aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inetpeer.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-11-30 14:41:59 -0500
committerDavid S. Miller <davem@davemloft.net>2010-11-30 14:41:59 -0500
commit98158f5a853cafd33b254ae0eacc0dd69f90b93b (patch)
treeccffd30760090c9b20d89d9bb30ed50246b699e6 /net/ipv4/inetpeer.c
parentc20ec76157747434652e721cdd4dccd8654ad370 (diff)
inetpeer: Abstract out the tree root accesses.
Instead of directly accessing "peer", change to code to operate using a "struct inet_peer_base *" pointer. This will facilitate the addition of a seperate tree for ipv6 peer entries. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inetpeer.c')
-rw-r--r--net/ipv4/inetpeer.c119
1 files changed, 69 insertions, 50 deletions
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 9e94d7cf4f8a..f94400848921 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -79,13 +79,13 @@ static const struct inet_peer peer_fake_node = {
79 .avl_height = 0 79 .avl_height = 0
80}; 80};
81 81
82static struct { 82static struct inet_peer_base {
83 struct inet_peer __rcu *root; 83 struct inet_peer __rcu *root;
84 spinlock_t lock; 84 spinlock_t lock;
85 int total; 85 int total;
86} peers = { 86} v4_peers = {
87 .root = peer_avl_empty_rcu, 87 .root = peer_avl_empty_rcu,
88 .lock = __SPIN_LOCK_UNLOCKED(peers.lock), 88 .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock),
89 .total = 0, 89 .total = 0,
90}; 90};
91#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 91#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -155,15 +155,15 @@ static void unlink_from_unused(struct inet_peer *p)
155/* 155/*
156 * Called with local BH disabled and the pool lock held. 156 * Called with local BH disabled and the pool lock held.
157 */ 157 */
158#define lookup(_daddr, _stack) \ 158#define lookup(_daddr, _stack, _base) \
159({ \ 159({ \
160 struct inet_peer *u; \ 160 struct inet_peer *u; \
161 struct inet_peer __rcu **v; \ 161 struct inet_peer __rcu **v; \
162 \ 162 \
163 stackptr = _stack; \ 163 stackptr = _stack; \
164 *stackptr++ = &peers.root; \ 164 *stackptr++ = &_base->root; \
165 for (u = rcu_dereference_protected(peers.root, \ 165 for (u = rcu_dereference_protected(_base->root, \
166 lockdep_is_held(&peers.lock)); \ 166 lockdep_is_held(&_base->lock)); \
167 u != peer_avl_empty; ) { \ 167 u != peer_avl_empty; ) { \
168 if (_daddr == u->v4daddr) \ 168 if (_daddr == u->v4daddr) \
169 break; \ 169 break; \
@@ -173,7 +173,7 @@ static void unlink_from_unused(struct inet_peer *p)
173 v = &u->avl_right; \ 173 v = &u->avl_right; \
174 *stackptr++ = v; \ 174 *stackptr++ = v; \
175 u = rcu_dereference_protected(*v, \ 175 u = rcu_dereference_protected(*v, \
176 lockdep_is_held(&peers.lock)); \ 176 lockdep_is_held(&_base->lock)); \
177 } \ 177 } \
178 u; \ 178 u; \
179}) 179})
@@ -185,9 +185,9 @@ static void unlink_from_unused(struct inet_peer *p)
185 * But every pointer we follow is guaranteed to be valid thanks to RCU. 185 * But every pointer we follow is guaranteed to be valid thanks to RCU.
186 * We exit from this function if number of links exceeds PEER_MAXDEPTH 186 * We exit from this function if number of links exceeds PEER_MAXDEPTH
187 */ 187 */
188static struct inet_peer *lookup_rcu_bh(__be32 daddr) 188static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base)
189{ 189{
190 struct inet_peer *u = rcu_dereference_bh(peers.root); 190 struct inet_peer *u = rcu_dereference_bh(base->root);
191 int count = 0; 191 int count = 0;
192 192
193 while (u != peer_avl_empty) { 193 while (u != peer_avl_empty) {
@@ -212,19 +212,19 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
212} 212}
213 213
214/* Called with local BH disabled and the pool lock held. */ 214/* Called with local BH disabled and the pool lock held. */
215#define lookup_rightempty(start) \ 215#define lookup_rightempty(start, base) \
216({ \ 216({ \
217 struct inet_peer *u; \ 217 struct inet_peer *u; \
218 struct inet_peer __rcu **v; \ 218 struct inet_peer __rcu **v; \
219 *stackptr++ = &start->avl_left; \ 219 *stackptr++ = &start->avl_left; \
220 v = &start->avl_left; \ 220 v = &start->avl_left; \
221 for (u = rcu_dereference_protected(*v, \ 221 for (u = rcu_dereference_protected(*v, \
222 lockdep_is_held(&peers.lock)); \ 222 lockdep_is_held(&base->lock)); \
223 u->avl_right != peer_avl_empty_rcu; ) { \ 223 u->avl_right != peer_avl_empty_rcu; ) { \
224 v = &u->avl_right; \ 224 v = &u->avl_right; \
225 *stackptr++ = v; \ 225 *stackptr++ = v; \
226 u = rcu_dereference_protected(*v, \ 226 u = rcu_dereference_protected(*v, \
227 lockdep_is_held(&peers.lock)); \ 227 lockdep_is_held(&base->lock)); \
228 } \ 228 } \
229 u; \ 229 u; \
230}) 230})
@@ -234,7 +234,8 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
234 * Look into mm/map_avl.c for more detail description of the ideas. 234 * Look into mm/map_avl.c for more detail description of the ideas.
235 */ 235 */
236static void peer_avl_rebalance(struct inet_peer __rcu **stack[], 236static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
237 struct inet_peer __rcu ***stackend) 237 struct inet_peer __rcu ***stackend,
238 struct inet_peer_base *base)
238{ 239{
239 struct inet_peer __rcu **nodep; 240 struct inet_peer __rcu **nodep;
240 struct inet_peer *node, *l, *r; 241 struct inet_peer *node, *l, *r;
@@ -243,20 +244,20 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
243 while (stackend > stack) { 244 while (stackend > stack) {
244 nodep = *--stackend; 245 nodep = *--stackend;
245 node = rcu_dereference_protected(*nodep, 246 node = rcu_dereference_protected(*nodep,
246 lockdep_is_held(&peers.lock)); 247 lockdep_is_held(&base->lock));
247 l = rcu_dereference_protected(node->avl_left, 248 l = rcu_dereference_protected(node->avl_left,
248 lockdep_is_held(&peers.lock)); 249 lockdep_is_held(&base->lock));
249 r = rcu_dereference_protected(node->avl_right, 250 r = rcu_dereference_protected(node->avl_right,
250 lockdep_is_held(&peers.lock)); 251 lockdep_is_held(&base->lock));
251 lh = node_height(l); 252 lh = node_height(l);
252 rh = node_height(r); 253 rh = node_height(r);
253 if (lh > rh + 1) { /* l: RH+2 */ 254 if (lh > rh + 1) { /* l: RH+2 */
254 struct inet_peer *ll, *lr, *lrl, *lrr; 255 struct inet_peer *ll, *lr, *lrl, *lrr;
255 int lrh; 256 int lrh;
256 ll = rcu_dereference_protected(l->avl_left, 257 ll = rcu_dereference_protected(l->avl_left,
257 lockdep_is_held(&peers.lock)); 258 lockdep_is_held(&base->lock));
258 lr = rcu_dereference_protected(l->avl_right, 259 lr = rcu_dereference_protected(l->avl_right,
259 lockdep_is_held(&peers.lock)); 260 lockdep_is_held(&base->lock));
260 lrh = node_height(lr); 261 lrh = node_height(lr);
261 if (lrh <= node_height(ll)) { /* ll: RH+1 */ 262 if (lrh <= node_height(ll)) { /* ll: RH+1 */
262 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ 263 RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */
@@ -268,9 +269,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
268 RCU_INIT_POINTER(*nodep, l); 269 RCU_INIT_POINTER(*nodep, l);
269 } else { /* ll: RH, lr: RH+1 */ 270 } else { /* ll: RH, lr: RH+1 */
270 lrl = rcu_dereference_protected(lr->avl_left, 271 lrl = rcu_dereference_protected(lr->avl_left,
271 lockdep_is_held(&peers.lock)); /* lrl: RH or RH-1 */ 272 lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */
272 lrr = rcu_dereference_protected(lr->avl_right, 273 lrr = rcu_dereference_protected(lr->avl_right,
273 lockdep_is_held(&peers.lock)); /* lrr: RH or RH-1 */ 274 lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */
274 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ 275 RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */
275 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ 276 RCU_INIT_POINTER(node->avl_right, r); /* r: RH */
276 node->avl_height = rh + 1; /* node: RH+1 */ 277 node->avl_height = rh + 1; /* node: RH+1 */
@@ -286,9 +287,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
286 struct inet_peer *rr, *rl, *rlr, *rll; 287 struct inet_peer *rr, *rl, *rlr, *rll;
287 int rlh; 288 int rlh;
288 rr = rcu_dereference_protected(r->avl_right, 289 rr = rcu_dereference_protected(r->avl_right,
289 lockdep_is_held(&peers.lock)); 290 lockdep_is_held(&base->lock));
290 rl = rcu_dereference_protected(r->avl_left, 291 rl = rcu_dereference_protected(r->avl_left,
291 lockdep_is_held(&peers.lock)); 292 lockdep_is_held(&base->lock));
292 rlh = node_height(rl); 293 rlh = node_height(rl);
293 if (rlh <= node_height(rr)) { /* rr: LH+1 */ 294 if (rlh <= node_height(rr)) { /* rr: LH+1 */
294 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ 295 RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */
@@ -300,9 +301,9 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
300 RCU_INIT_POINTER(*nodep, r); 301 RCU_INIT_POINTER(*nodep, r);
301 } else { /* rr: RH, rl: RH+1 */ 302 } else { /* rr: RH, rl: RH+1 */
302 rlr = rcu_dereference_protected(rl->avl_right, 303 rlr = rcu_dereference_protected(rl->avl_right,
303 lockdep_is_held(&peers.lock)); /* rlr: LH or LH-1 */ 304 lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */
304 rll = rcu_dereference_protected(rl->avl_left, 305 rll = rcu_dereference_protected(rl->avl_left,
305 lockdep_is_held(&peers.lock)); /* rll: LH or LH-1 */ 306 lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */
306 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ 307 RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */
307 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ 308 RCU_INIT_POINTER(node->avl_left, l); /* l: LH */
308 node->avl_height = lh + 1; /* node: LH+1 */ 309 node->avl_height = lh + 1; /* node: LH+1 */
@@ -321,14 +322,14 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[],
321} 322}
322 323
323/* Called with local BH disabled and the pool lock held. */ 324/* Called with local BH disabled and the pool lock held. */
324#define link_to_pool(n) \ 325#define link_to_pool(n, base) \
325do { \ 326do { \
326 n->avl_height = 1; \ 327 n->avl_height = 1; \
327 n->avl_left = peer_avl_empty_rcu; \ 328 n->avl_left = peer_avl_empty_rcu; \
328 n->avl_right = peer_avl_empty_rcu; \ 329 n->avl_right = peer_avl_empty_rcu; \
329 /* lockless readers can catch us now */ \ 330 /* lockless readers can catch us now */ \
330 rcu_assign_pointer(**--stackptr, n); \ 331 rcu_assign_pointer(**--stackptr, n); \
331 peer_avl_rebalance(stack, stackptr); \ 332 peer_avl_rebalance(stack, stackptr, base); \
332} while (0) 333} while (0)
333 334
334static void inetpeer_free_rcu(struct rcu_head *head) 335static void inetpeer_free_rcu(struct rcu_head *head)
@@ -337,13 +338,13 @@ static void inetpeer_free_rcu(struct rcu_head *head)
337} 338}
338 339
339/* May be called with local BH enabled. */ 340/* May be called with local BH enabled. */
340static void unlink_from_pool(struct inet_peer *p) 341static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base)
341{ 342{
342 int do_free; 343 int do_free;
343 344
344 do_free = 0; 345 do_free = 0;
345 346
346 spin_lock_bh(&peers.lock); 347 spin_lock_bh(&base->lock);
347 /* Check the reference counter. It was artificially incremented by 1 348 /* Check the reference counter. It was artificially incremented by 1
348 * in cleanup() function to prevent sudden disappearing. If we can 349 * in cleanup() function to prevent sudden disappearing. If we can
349 * atomically (because of lockless readers) take this last reference, 350 * atomically (because of lockless readers) take this last reference,
@@ -353,7 +354,7 @@ static void unlink_from_pool(struct inet_peer *p)
353 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { 354 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
354 struct inet_peer __rcu **stack[PEER_MAXDEPTH]; 355 struct inet_peer __rcu **stack[PEER_MAXDEPTH];
355 struct inet_peer __rcu ***stackptr, ***delp; 356 struct inet_peer __rcu ***stackptr, ***delp;
356 if (lookup(p->v4daddr, stack) != p) 357 if (lookup(p->v4daddr, stack, base) != p)
357 BUG(); 358 BUG();
358 delp = stackptr - 1; /* *delp[0] == p */ 359 delp = stackptr - 1; /* *delp[0] == p */
359 if (p->avl_left == peer_avl_empty_rcu) { 360 if (p->avl_left == peer_avl_empty_rcu) {
@@ -362,9 +363,9 @@ static void unlink_from_pool(struct inet_peer *p)
362 } else { 363 } else {
363 /* look for a node to insert instead of p */ 364 /* look for a node to insert instead of p */
364 struct inet_peer *t; 365 struct inet_peer *t;
365 t = lookup_rightempty(p); 366 t = lookup_rightempty(p, base);
366 BUG_ON(rcu_dereference_protected(*stackptr[-1], 367 BUG_ON(rcu_dereference_protected(*stackptr[-1],
367 lockdep_is_held(&peers.lock)) != t); 368 lockdep_is_held(&base->lock)) != t);
368 **--stackptr = t->avl_left; 369 **--stackptr = t->avl_left;
369 /* t is removed, t->v4daddr > x->v4daddr for any 370 /* t is removed, t->v4daddr > x->v4daddr for any
370 * x in p->avl_left subtree. 371 * x in p->avl_left subtree.
@@ -376,11 +377,11 @@ static void unlink_from_pool(struct inet_peer *p)
376 BUG_ON(delp[1] != &p->avl_left); 377 BUG_ON(delp[1] != &p->avl_left);
377 delp[1] = &t->avl_left; /* was &p->avl_left */ 378 delp[1] = &t->avl_left; /* was &p->avl_left */
378 } 379 }
379 peer_avl_rebalance(stack, stackptr); 380 peer_avl_rebalance(stack, stackptr, base);
380 peers.total--; 381 base->total--;
381 do_free = 1; 382 do_free = 1;
382 } 383 }
383 spin_unlock_bh(&peers.lock); 384 spin_unlock_bh(&base->lock);
384 385
385 if (do_free) 386 if (do_free)
386 call_rcu_bh(&p->rcu, inetpeer_free_rcu); 387 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
@@ -395,6 +396,11 @@ static void unlink_from_pool(struct inet_peer *p)
395 inet_putpeer(p); 396 inet_putpeer(p);
396} 397}
397 398
399static struct inet_peer_base *peer_to_base(struct inet_peer *p)
400{
401 return &v4_peers;
402}
403
398/* May be called with local BH enabled. */ 404/* May be called with local BH enabled. */
399static int cleanup_once(unsigned long ttl) 405static int cleanup_once(unsigned long ttl)
400{ 406{
@@ -428,21 +434,27 @@ static int cleanup_once(unsigned long ttl)
428 * happen because of entry limits in route cache. */ 434 * happen because of entry limits in route cache. */
429 return -1; 435 return -1;
430 436
431 unlink_from_pool(p); 437 unlink_from_pool(p, peer_to_base(p));
432 return 0; 438 return 0;
433} 439}
434 440
441static struct inet_peer_base *family_to_base(int family)
442{
443 return &v4_peers;
444}
445
435/* Called with or without local BH being disabled. */ 446/* Called with or without local BH being disabled. */
436struct inet_peer *inet_getpeer(__be32 daddr, int create) 447struct inet_peer *inet_getpeer(__be32 daddr, int create)
437{ 448{
438 struct inet_peer *p;
439 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; 449 struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
450 struct inet_peer_base *base = family_to_base(AF_INET);
451 struct inet_peer *p;
440 452
441 /* Look up for the address quickly, lockless. 453 /* Look up for the address quickly, lockless.
442 * Because of a concurrent writer, we might not find an existing entry. 454 * Because of a concurrent writer, we might not find an existing entry.
443 */ 455 */
444 rcu_read_lock_bh(); 456 rcu_read_lock_bh();
445 p = lookup_rcu_bh(daddr); 457 p = lookup_rcu_bh(daddr, base);
446 rcu_read_unlock_bh(); 458 rcu_read_unlock_bh();
447 459
448 if (p) { 460 if (p) {
@@ -456,11 +468,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
456 /* retry an exact lookup, taking the lock before. 468 /* retry an exact lookup, taking the lock before.
457 * At least, nodes should be hot in our cache. 469 * At least, nodes should be hot in our cache.
458 */ 470 */
459 spin_lock_bh(&peers.lock); 471 spin_lock_bh(&base->lock);
460 p = lookup(daddr, stack); 472 p = lookup(daddr, stack, base);
461 if (p != peer_avl_empty) { 473 if (p != peer_avl_empty) {
462 atomic_inc(&p->refcnt); 474 atomic_inc(&p->refcnt);
463 spin_unlock_bh(&peers.lock); 475 spin_unlock_bh(&base->lock);
464 /* Remove the entry from unused list if it was there. */ 476 /* Remove the entry from unused list if it was there. */
465 unlink_from_unused(p); 477 unlink_from_unused(p);
466 return p; 478 return p;
@@ -476,30 +488,36 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
476 488
477 489
478 /* Link the node. */ 490 /* Link the node. */
479 link_to_pool(p); 491 link_to_pool(p, base);
480 peers.total++; 492 base->total++;
481 } 493 }
482 spin_unlock_bh(&peers.lock); 494 spin_unlock_bh(&base->lock);
483 495
484 if (peers.total >= inet_peer_threshold) 496 if (base->total >= inet_peer_threshold)
485 /* Remove one less-recently-used entry. */ 497 /* Remove one less-recently-used entry. */
486 cleanup_once(0); 498 cleanup_once(0);
487 499
488 return p; 500 return p;
489} 501}
490 502
503static int compute_total(void)
504{
505 return v4_peers.total;
506}
507
491/* Called with local BH disabled. */ 508/* Called with local BH disabled. */
492static void peer_check_expire(unsigned long dummy) 509static void peer_check_expire(unsigned long dummy)
493{ 510{
494 unsigned long now = jiffies; 511 unsigned long now = jiffies;
495 int ttl; 512 int ttl, total;
496 513
497 if (peers.total >= inet_peer_threshold) 514 total = compute_total();
515 if (total >= inet_peer_threshold)
498 ttl = inet_peer_minttl; 516 ttl = inet_peer_minttl;
499 else 517 else
500 ttl = inet_peer_maxttl 518 ttl = inet_peer_maxttl
501 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 519 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
502 peers.total / inet_peer_threshold * HZ; 520 total / inet_peer_threshold * HZ;
503 while (!cleanup_once(ttl)) { 521 while (!cleanup_once(ttl)) {
504 if (jiffies != now) 522 if (jiffies != now)
505 break; 523 break;
@@ -508,13 +526,14 @@ static void peer_check_expire(unsigned long dummy)
508 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime 526 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
509 * interval depending on the total number of entries (more entries, 527 * interval depending on the total number of entries (more entries,
510 * less interval). */ 528 * less interval). */
511 if (peers.total >= inet_peer_threshold) 529 total = compute_total();
530 if (total >= inet_peer_threshold)
512 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; 531 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
513 else 532 else
514 peer_periodic_timer.expires = jiffies 533 peer_periodic_timer.expires = jiffies
515 + inet_peer_gc_maxtime 534 + inet_peer_gc_maxtime
516 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * 535 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
517 peers.total / inet_peer_threshold * HZ; 536 total / inet_peer_threshold * HZ;
518 add_timer(&peer_periodic_timer); 537 add_timer(&peer_periodic_timer);
519} 538}
520 539