aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2010-10-25 19:55:38 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-27 14:37:33 -0400
commitb914c4ea929a4ba6fb97967800dc473c31552b98 (patch)
tree630f7c81d82e580e5aa5164c0e4cf1d0dadca3ba
parent7a2b03c5175e9ddcc2a2d48ca86dea8a88b68383 (diff)
inetpeer: __rcu annotations
Adds __rcu annotations to inetpeer (struct inet_peer)->avl_left (struct inet_peer)->avl_right This is a tedious cleanup, but removes one smp_wmb() from link_to_pool() since we now use more self documenting rcu_assign_pointer(). Note the use of RCU_INIT_POINTER() instead of rcu_assign_pointer() in all cases we dont need a memory barrier. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inetpeer.h2
-rw-r--r--net/ipv4/inetpeer.c138
2 files changed, 81 insertions, 59 deletions
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 417d0c894f29..fe239bfe5f7f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -15,7 +15,7 @@
15 15
16struct inet_peer { 16struct inet_peer {
17 /* group together avl_left,avl_right,v4daddr to speedup lookups */ 17 /* group together avl_left,avl_right,v4daddr to speedup lookups */
18 struct inet_peer *avl_left, *avl_right; 18 struct inet_peer __rcu *avl_left, *avl_right;
19 __be32 v4daddr; /* peer's address */ 19 __be32 v4daddr; /* peer's address */
20 __u32 avl_height; 20 __u32 avl_height;
21 struct list_head unused; 21 struct list_head unused;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9ffa24b9a804..9e94d7cf4f8a 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -72,18 +72,19 @@ static struct kmem_cache *peer_cachep __read_mostly;
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73 73
74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node)
75static const struct inet_peer peer_fake_node = { 76static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty, 77 .avl_left = peer_avl_empty_rcu,
77 .avl_right = peer_avl_empty, 78 .avl_right = peer_avl_empty_rcu,
78 .avl_height = 0 79 .avl_height = 0
79}; 80};
80 81
81static struct { 82static struct {
82 struct inet_peer *root; 83 struct inet_peer __rcu *root;
83 spinlock_t lock; 84 spinlock_t lock;
84 int total; 85 int total;
85} peers = { 86} peers = {
86 .root = peer_avl_empty, 87 .root = peer_avl_empty_rcu,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0, 89 .total = 0,
89}; 90};
@@ -156,11 +157,14 @@ static void unlink_from_unused(struct inet_peer *p)
156 */ 157 */
157#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack) \
158({ \ 159({ \
159 struct inet_peer *u, **v; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \
160 \ 162 \
161 stackptr = _stack; \ 163 stackptr = _stack; \
162 *stackptr++ = &peers.root; \ 164 *stackptr++ = &peers.root; \
163 for (u = peers.root; u != peer_avl_empty; ) { \ 165 for (u = rcu_dereference_protected(peers.root, \
166 lockdep_is_held(&peers.lock)); \
167 u != peer_avl_empty; ) { \
164 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
165 break; \ 169 break; \
166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 170 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
@@ -168,7 +172,8 @@ static void unlink_from_unused(struct inet_peer *p)
168 else \ 172 else \
169 v = &u->avl_right; \ 173 v = &u->avl_right; \
170 *stackptr++ = v; \ 174 *stackptr++ = v; \
171 u = *v; \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \
172 } \ 177 } \
173 u; \ 178 u; \
174}) 179})
@@ -209,13 +214,17 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
209/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
210#define lookup_rightempty(start) \ 215#define lookup_rightempty(start) \
211({ \ 216({ \
212 struct inet_peer *u, **v; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \
213 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
214 v = &start->avl_left; \ 220 v = &start->avl_left; \
215 for (u = *v; u->avl_right != peer_avl_empty; ) { \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \
216 v = &u->avl_right; \ 224 v = &u->avl_right; \
217 *stackptr++ = v; \ 225 *stackptr++ = v; \
218 u = *v; \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \
219 } \ 228 } \
220 u; \ 229 u; \
221}) 230})
@@ -224,74 +233,86 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
224 * Variable names are the proof of operation correctness. 233 * Variable names are the proof of operation correctness.
225 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
226 */ 235 */
227static void peer_avl_rebalance(struct inet_peer **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
228 struct inet_peer ***stackend) 237 struct inet_peer __rcu ***stackend)
229{ 238{
230 struct inet_peer **nodep, *node, *l, *r; 239 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r;
231 int lh, rh; 241 int lh, rh;
232 242
233 while (stackend > stack) { 243 while (stackend > stack) {
234 nodep = *--stackend; 244 nodep = *--stackend;
235 node = *nodep; 245 node = rcu_dereference_protected(*nodep,
236 l = node->avl_left; 246 lockdep_is_held(&peers.lock));
237 r = node->avl_right; 247 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock));
249 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock));
238 lh = node_height(l); 251 lh = node_height(l);
239 rh = node_height(r); 252 rh = node_height(r);
240 if (lh > rh + 1) { /* l: RH+2 */ 253 if (lh > rh + 1) { /* l: RH+2 */
241 struct inet_peer *ll, *lr, *lrl, *lrr; 254 struct inet_peer *ll, *lr, *lrl, *lrr;
242 int lrh; 255 int lrh;
243 ll = l->avl_left; 256 ll = rcu_dereference_protected(l->avl_left,
244 lr = l->avl_right; 257 lockdep_is_held(&peers.lock));
258 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock));
245 lrh = node_height(lr); 260 lrh = node_height(lr);
246 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 261 if (lrh <= node_height(ll)) { /* ll: RH+1 */
247 node->avl_left = lr; /* lr: RH or RH+1 */ 262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
248 node->avl_right = r; /* r: RH */ 263 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
249 node->avl_height = lrh + 1; /* RH+1 or RH+2 */ 264 node->avl_height = lrh + 1; /* RH+1 or RH+2 */
250 l->avl_left = ll; /* ll: RH+1 */ 265 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */
251 l->avl_right = node; /* node: RH+1 or RH+2 */ 266 RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */
252 l->avl_height = node->avl_height + 1; 267 l->avl_height = node->avl_height + 1;
253 *nodep = l; 268 RCU_INIT_POINTER(*nodep, l);
254 } else { /* ll: RH, lr: RH+1 */ 269 } else { /* ll: RH, lr: RH+1 */
255 lrl = lr->avl_left; /* lrl: RH or RH-1 */ 270 lrl = rcu_dereference_protected(lr->avl_left,
256 lrr = lr->avl_right; /* lrr: RH or RH-1 */ 271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */
257 node->avl_left = lrr; /* lrr: RH or RH-1 */ 272 lrr = rcu_dereference_protected(lr->avl_right,
258 node->avl_right = r; /* r: RH */ 273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
259 node->avl_height = rh + 1; /* node: RH+1 */ 276 node->avl_height = rh + 1; /* node: RH+1 */
260 l->avl_left = ll; /* ll: RH */ 277 RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */
261 l->avl_right = lrl; /* lrl: RH or RH-1 */ 278 RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */
262 l->avl_height = rh + 1; /* l: RH+1 */ 279 l->avl_height = rh + 1; /* l: RH+1 */
263 lr->avl_left = l; /* l: RH+1 */ 280 RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */
264 lr->avl_right = node; /* node: RH+1 */ 281 RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */
265 lr->avl_height = rh + 2; 282 lr->avl_height = rh + 2;
266 *nodep = lr; 283 RCU_INIT_POINTER(*nodep, lr);
267 } 284 }
268 } else if (rh > lh + 1) { /* r: LH+2 */ 285 } else if (rh > lh + 1) { /* r: LH+2 */
269 struct inet_peer *rr, *rl, *rlr, *rll; 286 struct inet_peer *rr, *rl, *rlr, *rll;
270 int rlh; 287 int rlh;
271 rr = r->avl_right; 288 rr = rcu_dereference_protected(r->avl_right,
272 rl = r->avl_left; 289 lockdep_is_held(&peers.lock));
290 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock));
273 rlh = node_height(rl); 292 rlh = node_height(rl);
274 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 293 if (rlh <= node_height(rr)) { /* rr: LH+1 */
275 node->avl_right = rl; /* rl: LH or LH+1 */ 294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
276 node->avl_left = l; /* l: LH */ 295 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
277 node->avl_height = rlh + 1; /* LH+1 or LH+2 */ 296 node->avl_height = rlh + 1; /* LH+1 or LH+2 */
278 r->avl_right = rr; /* rr: LH+1 */ 297 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */
279 r->avl_left = node; /* node: LH+1 or LH+2 */ 298 RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */
280 r->avl_height = node->avl_height + 1; 299 r->avl_height = node->avl_height + 1;
281 *nodep = r; 300 RCU_INIT_POINTER(*nodep, r);
282 } else { /* rr: RH, rl: RH+1 */ 301 } else { /* rr: RH, rl: RH+1 */
283 rlr = rl->avl_right; /* rlr: LH or LH-1 */ 302 rlr = rcu_dereference_protected(rl->avl_right,
284 rll = rl->avl_left; /* rll: LH or LH-1 */ 303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */
285 node->avl_right = rll; /* rll: LH or LH-1 */ 304 rll = rcu_dereference_protected(rl->avl_left,
286 node->avl_left = l; /* l: LH */ 305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
287 node->avl_height = lh + 1; /* node: LH+1 */ 308 node->avl_height = lh + 1; /* node: LH+1 */
288 r->avl_right = rr; /* rr: LH */ 309 RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */
289 r->avl_left = rlr; /* rlr: LH or LH-1 */ 310 RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */
290 r->avl_height = lh + 1; /* r: LH+1 */ 311 r->avl_height = lh + 1; /* r: LH+1 */
291 rl->avl_right = r; /* r: LH+1 */ 312 RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */
292 rl->avl_left = node; /* node: LH+1 */ 313 RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */
293 rl->avl_height = lh + 2; 314 rl->avl_height = lh + 2;
294 *nodep = rl; 315 RCU_INIT_POINTER(*nodep, rl);
295 } 316 }
296 } else { 317 } else {
297 node->avl_height = (lh > rh ? lh : rh) + 1; 318 node->avl_height = (lh > rh ? lh : rh) + 1;
@@ -303,10 +324,10 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
303#define link_to_pool(n) \ 324#define link_to_pool(n) \
304do { \ 325do { \
305 n->avl_height = 1; \ 326 n->avl_height = 1; \
306 n->avl_left = peer_avl_empty; \ 327 n->avl_left = peer_avl_empty_rcu; \
307 n->avl_right = peer_avl_empty; \ 328 n->avl_right = peer_avl_empty_rcu; \
308 smp_wmb(); /* lockless readers can catch us now */ \ 329 /* lockless readers can catch us now */ \
309 **--stackptr = n; \ 330 rcu_assign_pointer(**--stackptr, n); \
310 peer_avl_rebalance(stack, stackptr); \ 331 peer_avl_rebalance(stack, stackptr); \
311} while (0) 332} while (0)
312 333
@@ -330,24 +351,25 @@ static void unlink_from_pool(struct inet_peer *p)
330 * We use refcnt=-1 to alert lockless readers this entry is deleted. 351 * We use refcnt=-1 to alert lockless readers this entry is deleted.
331 */ 352 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
333 struct inet_peer **stack[PEER_MAXDEPTH]; 354 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
334 struct inet_peer ***stackptr, ***delp; 355 struct inet_peer __rcu ***stackptr, ***delp;
335 if (lookup(p->v4daddr, stack) != p) 356 if (lookup(p->v4daddr, stack) != p)
336 BUG(); 357 BUG();
337 delp = stackptr - 1; /* *delp[0] == p */ 358 delp = stackptr - 1; /* *delp[0] == p */
338 if (p->avl_left == peer_avl_empty) { 359 if (p->avl_left == peer_avl_empty_rcu) {
339 *delp[0] = p->avl_right; 360 *delp[0] = p->avl_right;
340 --stackptr; 361 --stackptr;
341 } else { 362 } else {
342 /* look for a node to insert instead of p */ 363 /* look for a node to insert instead of p */
343 struct inet_peer *t; 364 struct inet_peer *t;
344 t = lookup_rightempty(p); 365 t = lookup_rightempty(p);
345 BUG_ON(*stackptr[-1] != t); 366 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t);
346 **--stackptr = t->avl_left; 368 **--stackptr = t->avl_left;
347 /* t is removed, t->v4daddr > x->v4daddr for any 369 /* t is removed, t->v4daddr > x->v4daddr for any
348 * x in p->avl_left subtree. 370 * x in p->avl_left subtree.
349 * Put t in the old place of p. */ 371 * Put t in the old place of p. */
350 *delp[0] = t; 372 RCU_INIT_POINTER(*delp[0], t);
351 t->avl_left = p->avl_left; 373 t->avl_left = p->avl_left;
352 t->avl_right = p->avl_right; 374 t->avl_right = p->avl_right;
353 t->avl_height = p->avl_height; 375 t->avl_height = p->avl_height;
@@ -414,7 +436,7 @@ static int cleanup_once(unsigned long ttl)
414struct inet_peer *inet_getpeer(__be32 daddr, int create) 436struct inet_peer *inet_getpeer(__be32 daddr, int create)
415{ 437{
416 struct inet_peer *p; 438 struct inet_peer *p;
417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
418 440
419 /* Look up for the address quickly, lockless. 441 /* Look up for the address quickly, lockless.
420 * Because of a concurrent writer, we might not find an existing entry. 442 * Because of a concurrent writer, we might not find an existing entry.