diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-10-25 19:55:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-10-27 14:37:33 -0400 |
commit | b914c4ea929a4ba6fb97967800dc473c31552b98 (patch) | |
tree | 630f7c81d82e580e5aa5164c0e4cf1d0dadca3ba | |
parent | 7a2b03c5175e9ddcc2a2d48ca86dea8a88b68383 (diff) |
inetpeer: __rcu annotations
Adds __rcu annotations to inetpeer
(struct inet_peer)->avl_left
(struct inet_peer)->avl_right
This is a tedious cleanup, but removes one smp_wmb() from link_to_pool()
since we now use more self documenting rcu_assign_pointer().
Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in
all cases we dont need a memory barrier.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inetpeer.h | 2 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 138 |
2 files changed, 81 insertions, 59 deletions
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 417d0c894f29..fe239bfe5f7f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | struct inet_peer { | 16 | struct inet_peer { |
17 | /* group together avl_left,avl_right,v4daddr to speedup lookups */ | 17 | /* group together avl_left,avl_right,v4daddr to speedup lookups */ |
18 | struct inet_peer *avl_left, *avl_right; | 18 | struct inet_peer __rcu *avl_left, *avl_right; |
19 | __be32 v4daddr; /* peer's address */ | 19 | __be32 v4daddr; /* peer's address */ |
20 | __u32 avl_height; | 20 | __u32 avl_height; |
21 | struct list_head unused; | 21 | struct list_head unused; |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9ffa24b9a804..9e94d7cf4f8a 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly; | |||
72 | #define node_height(x) x->avl_height | 72 | #define node_height(x) x->avl_height |
73 | 73 | ||
74 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) | 74 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) |
75 | #define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) | ||
75 | static const struct inet_peer peer_fake_node = { | 76 | static const struct inet_peer peer_fake_node = { |
76 | .avl_left = peer_avl_empty, | 77 | .avl_left = peer_avl_empty_rcu, |
77 | .avl_right = peer_avl_empty, | 78 | .avl_right = peer_avl_empty_rcu, |
78 | .avl_height = 0 | 79 | .avl_height = 0 |
79 | }; | 80 | }; |
80 | 81 | ||
81 | static struct { | 82 | static struct { |
82 | struct inet_peer *root; | 83 | struct inet_peer __rcu *root; |
83 | spinlock_t lock; | 84 | spinlock_t lock; |
84 | int total; | 85 | int total; |
85 | } peers = { | 86 | } peers = { |
86 | .root = peer_avl_empty, | 87 | .root = peer_avl_empty_rcu, |
87 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), | 88 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), |
88 | .total = 0, | 89 | .total = 0, |
89 | }; | 90 | }; |
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p) | |||
156 | */ | 157 | */ |
157 | #define lookup(_daddr, _stack) \ | 158 | #define lookup(_daddr, _stack) \ |
158 | ({ \ | 159 | ({ \ |
159 | struct inet_peer *u, **v; \ | 160 | struct inet_peer *u; \ |
161 | struct inet_peer __rcu **v; \ | ||
160 | \ | 162 | \ |
161 | stackptr = _stack; \ | 163 | stackptr = _stack; \ |
162 | *stackptr++ = &peers.root; \ | 164 | *stackptr++ = &peers.root; \ |
163 | for (u = peers.root; u != peer_avl_empty; ) { \ | 165 | for (u = rcu_dereference_protected(peers.root, \ |
166 | lockdep_is_held(&peers.lock)); \ | ||
167 | u != peer_avl_empty; ) { \ | ||
164 | if (_daddr == u->v4daddr) \ | 168 | if (_daddr == u->v4daddr) \ |
165 | break; \ | 169 | break; \ |
166 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ | 170 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ |
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p) | |||
168 | else \ | 172 | else \ |
169 | v = &u->avl_right; \ | 173 | v = &u->avl_right; \ |
170 | *stackptr++ = v; \ | 174 | *stackptr++ = v; \ |
171 | u = *v; \ | 175 | u = rcu_dereference_protected(*v, \ |
176 | lockdep_is_held(&peers.lock)); \ | ||
172 | } \ | 177 | } \ |
173 | u; \ | 178 | u; \ |
174 | }) | 179 | }) |
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) | |||
209 | /* Called with local BH disabled and the pool lock held. */ | 214 | /* Called with local BH disabled and the pool lock held. */ |
210 | #define lookup_rightempty(start) \ | 215 | #define lookup_rightempty(start) \ |
211 | ({ \ | 216 | ({ \ |
212 | struct inet_peer *u, **v; \ | 217 | struct inet_peer *u; \ |
218 | struct inet_peer __rcu **v; \ | ||
213 | *stackptr++ = &start->avl_left; \ | 219 | *stackptr++ = &start->avl_left; \ |
214 | v = &start->avl_left; \ | 220 | v = &start->avl_left; \ |
215 | for (u = *v; u->avl_right != peer_avl_empty; ) { \ | 221 | for (u = rcu_dereference_protected(*v, \ |
222 | lockdep_is_held(&peers.lock)); \ | ||
223 | u->avl_right != peer_avl_empty_rcu; ) { \ | ||
216 | v = &u->avl_right; \ | 224 | v = &u->avl_right; \ |
217 | *stackptr++ = v; \ | 225 | *stackptr++ = v; \ |
218 | u = *v; \ | 226 | u = rcu_dereference_protected(*v, \ |
227 | lockdep_is_held(&peers.lock)); \ | ||
219 | } \ | 228 | } \ |
220 | u; \ | 229 | u; \ |
221 | }) | 230 | }) |
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr) | |||
224 | * Variable names are the proof of operation correctness. | 233 | * Variable names are the proof of operation correctness. |
225 | * Look into mm/map_avl.c for more detail description of the ideas. | 234 | * Look into mm/map_avl.c for more detail description of the ideas. |
226 | */ | 235 | */ |
227 | static void peer_avl_rebalance(struct inet_peer **stack[], | 236 | static void peer_avl_rebalance(struct inet_peer __rcu **stack[], |
228 | struct inet_peer ***stackend) | 237 | struct inet_peer __rcu ***stackend) |
229 | { | 238 | { |
230 | struct inet_peer **nodep, *node, *l, *r; | 239 | struct inet_peer __rcu **nodep; |
240 | struct inet_peer *node, *l, *r; | ||
231 | int lh, rh; | 241 | int lh, rh; |
232 | 242 | ||
233 | while (stackend > stack) { | 243 | while (stackend > stack) { |
234 | nodep = *--stackend; | 244 | nodep = *--stackend; |
235 | node = *nodep; | 245 | node = rcu_dereference_protected(*nodep, |
236 | l = node->avl_left; | 246 | lockdep_is_held(&peers.lock)); |
237 | r = node->avl_right; | 247 | l = rcu_dereference_protected(node->avl_left, |
248 | lockdep_is_held(&peers.lock)); | ||
249 | r = rcu_dereference_protected(node->avl_right, | ||
250 | lockdep_is_held(&peers.lock)); | ||
238 | lh = node_height(l); | 251 | lh = node_height(l); |
239 | rh = node_height(r); | 252 | rh = node_height(r); |
240 | if (lh > rh + 1) { /* l: RH+2 */ | 253 | if (lh > rh + 1) { /* l: RH+2 */ |
241 | struct inet_peer *ll, *lr, *lrl, *lrr; | 254 | struct inet_peer *ll, *lr, *lrl, *lrr; |
242 | int lrh; | 255 | int lrh; |
243 | ll = l->avl_left; | 256 | ll = rcu_dereference_protected(l->avl_left, |
244 | lr = l->avl_right; | 257 | lockdep_is_held(&peers.lock)); |
258 | lr = rcu_dereference_protected(l->avl_right, | ||
259 | lockdep_is_held(&peers.lock)); | ||
245 | lrh = node_height(lr); | 260 | lrh = node_height(lr); |
246 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ | 261 | if (lrh <= node_height(ll)) { /* ll: RH+1 */ |
247 | node->avl_left = lr; /* lr: RH or RH+1 */ | 262 | RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ |
248 | node->avl_right = r; /* r: RH */ | 263 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ |
249 | node->avl_height = lrh + 1; /* RH+1 or RH+2 */ | 264 | node->avl_height = lrh + 1; /* RH+1 or RH+2 */ |
250 | l->avl_left = ll; /* ll: RH+1 */ | 265 | RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ |
251 | l->avl_right = node; /* node: RH+1 or RH+2 */ | 266 | RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ |
252 | l->avl_height = node->avl_height + 1; | 267 | l->avl_height = node->avl_height + 1; |
253 | *nodep = l; | 268 | RCU_INIT_POINTER(*nodep, l); |
254 | } else { /* ll: RH, lr: RH+1 */ | 269 | } else { /* ll: RH, lr: RH+1 */ |
255 | lrl = lr->avl_left; /* lrl: RH or RH-1 */ | 270 | lrl = rcu_dereference_protected(lr->avl_left, |
256 | lrr = lr->avl_right; /* lrr: RH or RH-1 */ | 271 | lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ |
257 | node->avl_left = lrr; /* lrr: RH or RH-1 */ | 272 | lrr = rcu_dereference_protected(lr->avl_right, |
258 | node->avl_right = r; /* r: RH */ | 273 | lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ |
274 | RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ | ||
275 | RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ | ||
259 | node->avl_height = rh + 1; /* node: RH+1 */ | 276 | node->avl_height = rh + 1; /* node: RH+1 */ |
260 | l->avl_left = ll; /* ll: RH */ | 277 | RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ |
261 | l->avl_right = lrl; /* lrl: RH or RH-1 */ | 278 | RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ |
262 | l->avl_height = rh + 1; /* l: RH+1 */ | 279 | l->avl_height = rh + 1; /* l: RH+1 */ |
263 | lr->avl_left = l; /* l: RH+1 */ | 280 | RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ |
264 | lr->avl_right = node; /* node: RH+1 */ | 281 | RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ |
265 | lr->avl_height = rh + 2; | 282 | lr->avl_height = rh + 2; |
266 | *nodep = lr; | 283 | RCU_INIT_POINTER(*nodep, lr); |
267 | } | 284 | } |
268 | } else if (rh > lh + 1) { /* r: LH+2 */ | 285 | } else if (rh > lh + 1) { /* r: LH+2 */ |
269 | struct inet_peer *rr, *rl, *rlr, *rll; | 286 | struct inet_peer *rr, *rl, *rlr, *rll; |
270 | int rlh; | 287 | int rlh; |
271 | rr = r->avl_right; | 288 | rr = rcu_dereference_protected(r->avl_right, |
272 | rl = r->avl_left; | 289 | lockdep_is_held(&peers.lock)); |
290 | rl = rcu_dereference_protected(r->avl_left, | ||
291 | lockdep_is_held(&peers.lock)); | ||
273 | rlh = node_height(rl); | 292 | rlh = node_height(rl); |
274 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ | 293 | if (rlh <= node_height(rr)) { /* rr: LH+1 */ |
275 | node->avl_right = rl; /* rl: LH or LH+1 */ | 294 | RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ |
276 | node->avl_left = l; /* l: LH */ | 295 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ |
277 | node->avl_height = rlh + 1; /* LH+1 or LH+2 */ | 296 | node->avl_height = rlh + 1; /* LH+1 or LH+2 */ |
278 | r->avl_right = rr; /* rr: LH+1 */ | 297 | RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ |
279 | r->avl_left = node; /* node: LH+1 or LH+2 */ | 298 | RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ |
280 | r->avl_height = node->avl_height + 1; | 299 | r->avl_height = node->avl_height + 1; |
281 | *nodep = r; | 300 | RCU_INIT_POINTER(*nodep, r); |
282 | } else { /* rr: RH, rl: RH+1 */ | 301 | } else { /* rr: RH, rl: RH+1 */ |
283 | rlr = rl->avl_right; /* rlr: LH or LH-1 */ | 302 | rlr = rcu_dereference_protected(rl->avl_right, |
284 | rll = rl->avl_left; /* rll: LH or LH-1 */ | 303 | lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ |
285 | node->avl_right = rll; /* rll: LH or LH-1 */ | 304 | rll = rcu_dereference_protected(rl->avl_left, |
286 | node->avl_left = l; /* l: LH */ | 305 | lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ |
306 | RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ | ||
307 | RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ | ||
287 | node->avl_height = lh + 1; /* node: LH+1 */ | 308 | node->avl_height = lh + 1; /* node: LH+1 */ |
288 | r->avl_right = rr; /* rr: LH */ | 309 | RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ |
289 | r->avl_left = rlr; /* rlr: LH or LH-1 */ | 310 | RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ |
290 | r->avl_height = lh + 1; /* r: LH+1 */ | 311 | r->avl_height = lh + 1; /* r: LH+1 */ |
291 | rl->avl_right = r; /* r: LH+1 */ | 312 | RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ |
292 | rl->avl_left = node; /* node: LH+1 */ | 313 | RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ |
293 | rl->avl_height = lh + 2; | 314 | rl->avl_height = lh + 2; |
294 | *nodep = rl; | 315 | RCU_INIT_POINTER(*nodep, rl); |
295 | } | 316 | } |
296 | } else { | 317 | } else { |
297 | node->avl_height = (lh > rh ? lh : rh) + 1; | 318 | node->avl_height = (lh > rh ? lh : rh) + 1; |
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[], | |||
303 | #define link_to_pool(n) \ | 324 | #define link_to_pool(n) \ |
304 | do { \ | 325 | do { \ |
305 | n->avl_height = 1; \ | 326 | n->avl_height = 1; \ |
306 | n->avl_left = peer_avl_empty; \ | 327 | n->avl_left = peer_avl_empty_rcu; \ |
307 | n->avl_right = peer_avl_empty; \ | 328 | n->avl_right = peer_avl_empty_rcu; \ |
308 | smp_wmb(); /* lockless readers can catch us now */ \ | 329 | /* lockless readers can catch us now */ \ |
309 | **--stackptr = n; \ | 330 | rcu_assign_pointer(**--stackptr, n); \ |
310 | peer_avl_rebalance(stack, stackptr); \ | 331 | peer_avl_rebalance(stack, stackptr); \ |
311 | } while (0) | 332 | } while (0) |
312 | 333 | ||
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p) | |||
330 | * We use refcnt=-1 to alert lockless readers this entry is deleted. | 351 | * We use refcnt=-1 to alert lockless readers this entry is deleted. |
331 | */ | 352 | */ |
332 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | 353 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { |
333 | struct inet_peer **stack[PEER_MAXDEPTH]; | 354 | struct inet_peer __rcu **stack[PEER_MAXDEPTH]; |
334 | struct inet_peer ***stackptr, ***delp; | 355 | struct inet_peer __rcu ***stackptr, ***delp; |
335 | if (lookup(p->v4daddr, stack) != p) | 356 | if (lookup(p->v4daddr, stack) != p) |
336 | BUG(); | 357 | BUG(); |
337 | delp = stackptr - 1; /* *delp[0] == p */ | 358 | delp = stackptr - 1; /* *delp[0] == p */ |
338 | if (p->avl_left == peer_avl_empty) { | 359 | if (p->avl_left == peer_avl_empty_rcu) { |
339 | *delp[0] = p->avl_right; | 360 | *delp[0] = p->avl_right; |
340 | --stackptr; | 361 | --stackptr; |
341 | } else { | 362 | } else { |
342 | /* look for a node to insert instead of p */ | 363 | /* look for a node to insert instead of p */ |
343 | struct inet_peer *t; | 364 | struct inet_peer *t; |
344 | t = lookup_rightempty(p); | 365 | t = lookup_rightempty(p); |
345 | BUG_ON(*stackptr[-1] != t); | 366 | BUG_ON(rcu_dereference_protected(*stackptr[-1], |
367 | lockdep_is_held(&peers.lock)) != t); | ||
346 | **--stackptr = t->avl_left; | 368 | **--stackptr = t->avl_left; |
347 | /* t is removed, t->v4daddr > x->v4daddr for any | 369 | /* t is removed, t->v4daddr > x->v4daddr for any |
348 | * x in p->avl_left subtree. | 370 | * x in p->avl_left subtree. |
349 | * Put t in the old place of p. */ | 371 | * Put t in the old place of p. */ |
350 | *delp[0] = t; | 372 | RCU_INIT_POINTER(*delp[0], t); |
351 | t->avl_left = p->avl_left; | 373 | t->avl_left = p->avl_left; |
352 | t->avl_right = p->avl_right; | 374 | t->avl_right = p->avl_right; |
353 | t->avl_height = p->avl_height; | 375 | t->avl_height = p->avl_height; |
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl) | |||
414 | struct inet_peer *inet_getpeer(__be32 daddr, int create) | 436 | struct inet_peer *inet_getpeer(__be32 daddr, int create) |
415 | { | 437 | { |
416 | struct inet_peer *p; | 438 | struct inet_peer *p; |
417 | struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; | 439 | struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; |
418 | 440 | ||
419 | /* Look up for the address quickly, lockless. | 441 | /* Look up for the address quickly, lockless. |
420 | * Because of a concurrent writer, we might not find an existing entry. | 442 | * Because of a concurrent writer, we might not find an existing entry. |