aboutsummaryrefslogtreecommitdiffstats
path: root/net/xfrm
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2006-08-24 07:45:07 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-09-22 18:08:48 -0400
commit2518c7c2b3d7f0a6b302b4efe17c911f8dd4049f (patch)
tree7de05ca17d76eee141d4feff3b7b27d850557ae6 /net/xfrm
parentc1969f294e624d5b642fc8e6ab9468b7c7791fa8 (diff)
[XFRM]: Hash policies when non-prefixed.
This idea is from Alexey Kuznetsov. It is common for policies to be non-prefixed. And for that case we can optimize lookups, insert, etc. quite a bit. For each direction, we have a dynamically sized policy hash table for non-prefixed policies. We also have a hash table on policy->index. For prefixed policies, we have a list per-direction which we will consult on lookups when a non-prefix hashtable lookup fails. This still isn't as efficient as I would like it. There are four immediate problems: 1) Lots of excessive refcounting, which can be fixed just like xfrm_state was 2) We do 2 hash probes on insert, one to look for dups and one to allocate a unique policy->index. Althought I wonder how much this matters since xfrm_state inserts do up to 3 hash probes and that seems to perform fine. 3) xfrm_policy_insert() is very complex because of the priority ordering and entry replacement logic. 4) Lots of counter bumping, in addition to policy refcounts, in the form of xfrm_policy_count[]. This is merely used to let code path(s) know that some IPSEC rules exist. So this count is indexed per-direction, maybe that is overkill. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm')
-rw-r--r--net/xfrm/xfrm_policy.c681
1 files changed, 541 insertions, 140 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 1bcaae4adf3a..087a5443b051 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -22,6 +22,9 @@
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/netfilter.h> 23#include <linux/netfilter.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/bootmem.h>
26#include <linux/vmalloc.h>
27#include <linux/cache.h>
25#include <net/xfrm.h> 28#include <net/xfrm.h>
26#include <net/ip.h> 29#include <net/ip.h>
27 30
@@ -30,26 +33,8 @@ EXPORT_SYMBOL(xfrm_cfg_mutex);
30 33
31static DEFINE_RWLOCK(xfrm_policy_lock); 34static DEFINE_RWLOCK(xfrm_policy_lock);
32 35
33struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; 36unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
34EXPORT_SYMBOL(xfrm_policy_list); 37EXPORT_SYMBOL(xfrm_policy_count);
35#ifdef CONFIG_XFRM_SUB_POLICY
36struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2];
37EXPORT_SYMBOL(xfrm_policy_list_sub);
38
39#define XFRM_POLICY_LISTS(type) \
40 ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \
41 xfrm_policy_list)
42#define XFRM_POLICY_LISTHEAD(type, dir) \
43 ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \
44 xfrm_policy_list[dir])
45#define XFRM_POLICY_LISTHEADP(type, dir) \
46 ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \
47 &xfrm_policy_list[dir])
48#else
49#define XFRM_POLICY_LISTS(type) xfrm_policy_list
50#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir]
51#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir]
52#endif
53 38
54static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 39static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
55static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; 40static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
@@ -57,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
57static kmem_cache_t *xfrm_dst_cache __read_mostly; 42static kmem_cache_t *xfrm_dst_cache __read_mostly;
58 43
59static struct work_struct xfrm_policy_gc_work; 44static struct work_struct xfrm_policy_gc_work;
60static struct list_head xfrm_policy_gc_list = 45static HLIST_HEAD(xfrm_policy_gc_list);
61 LIST_HEAD_INIT(xfrm_policy_gc_list);
62static DEFINE_SPINLOCK(xfrm_policy_gc_lock); 46static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
63 47
64static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 48static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
@@ -328,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
328 policy = kzalloc(sizeof(struct xfrm_policy), gfp); 312 policy = kzalloc(sizeof(struct xfrm_policy), gfp);
329 313
330 if (policy) { 314 if (policy) {
331 atomic_set(&policy->refcnt, 1); 315 INIT_HLIST_NODE(&policy->bydst);
316 INIT_HLIST_NODE(&policy->byidx);
332 rwlock_init(&policy->lock); 317 rwlock_init(&policy->lock);
318 atomic_set(&policy->refcnt, 1);
333 init_timer(&policy->timer); 319 init_timer(&policy->timer);
334 policy->timer.data = (unsigned long)policy; 320 policy->timer.data = (unsigned long)policy;
335 policy->timer.function = xfrm_policy_timer; 321 policy->timer.function = xfrm_policy_timer;
@@ -375,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
375static void xfrm_policy_gc_task(void *data) 361static void xfrm_policy_gc_task(void *data)
376{ 362{
377 struct xfrm_policy *policy; 363 struct xfrm_policy *policy;
378 struct list_head *entry, *tmp; 364 struct hlist_node *entry, *tmp;
379 struct list_head gc_list = LIST_HEAD_INIT(gc_list); 365 struct hlist_head gc_list;
380 366
381 spin_lock_bh(&xfrm_policy_gc_lock); 367 spin_lock_bh(&xfrm_policy_gc_lock);
382 list_splice_init(&xfrm_policy_gc_list, &gc_list); 368 gc_list.first = xfrm_policy_gc_list.first;
369 INIT_HLIST_HEAD(&xfrm_policy_gc_list);
383 spin_unlock_bh(&xfrm_policy_gc_lock); 370 spin_unlock_bh(&xfrm_policy_gc_lock);
384 371
385 list_for_each_safe(entry, tmp, &gc_list) { 372 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
386 policy = list_entry(entry, struct xfrm_policy, list);
387 xfrm_policy_gc_kill(policy); 373 xfrm_policy_gc_kill(policy);
388 }
389} 374}
390 375
391/* Rule must be locked. Release descentant resources, announce 376/* Rule must be locked. Release descentant resources, announce
@@ -407,70 +392,354 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
407 } 392 }
408 393
409 spin_lock(&xfrm_policy_gc_lock); 394 spin_lock(&xfrm_policy_gc_lock);
410 list_add(&policy->list, &xfrm_policy_gc_list); 395 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
411 spin_unlock(&xfrm_policy_gc_lock); 396 spin_unlock(&xfrm_policy_gc_lock);
412 397
413 schedule_work(&xfrm_policy_gc_work); 398 schedule_work(&xfrm_policy_gc_work);
414} 399}
415 400
401struct xfrm_policy_hash {
402 struct hlist_head *table;
403 unsigned int hmask;
404};
405
406static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
407static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
408static struct hlist_head *xfrm_policy_byidx __read_mostly;
409static unsigned int xfrm_idx_hmask __read_mostly;
410static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
411
412static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
413{
414 return (index ^ (index >> 8)) & hmask;
415}
416
417static inline unsigned int idx_hash(u32 index)
418{
419 return __idx_hash(index, xfrm_idx_hmask);
420}
421
422static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask)
423{
424 xfrm_address_t *daddr = &sel->daddr;
425 xfrm_address_t *saddr = &sel->saddr;
426 unsigned int h = 0;
427
428 switch (family) {
429 case AF_INET:
430 if (sel->prefixlen_d != 32 ||
431 sel->prefixlen_s != 32)
432 return hmask + 1;
433
434 h = ntohl(daddr->a4 ^ saddr->a4);
435 break;
436
437 case AF_INET6:
438 if (sel->prefixlen_d != 128 ||
439 sel->prefixlen_s != 128)
440 return hmask + 1;
441
442 h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
443 saddr->a6[2] ^ saddr->a6[3]);
444 break;
445 };
446 h ^= (h >> 16);
447 return h & hmask;
448}
449
450static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask)
451{
452 unsigned int h = 0;
453
454 switch (family) {
455 case AF_INET:
456 h = ntohl(daddr->a4 ^ saddr->a4);
457 break;
458
459 case AF_INET6:
460 h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^
461 saddr->a6[2] ^ saddr->a6[3]);
462 break;
463 };
464 h ^= (h >> 16);
465 return h & hmask;
466}
467
468static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
469{
470 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
471 unsigned int hash = __sel_hash(sel, family, hmask);
472
473 return (hash == hmask + 1 ?
474 &xfrm_policy_inexact[dir] :
475 xfrm_policy_bydst[dir].table + hash);
476}
477
478static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
479{
480 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
481 unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
482
483 return xfrm_policy_bydst[dir].table + hash;
484}
485
486static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz)
487{
488 struct hlist_head *n;
489
490 if (sz <= PAGE_SIZE)
491 n = kmalloc(sz, GFP_KERNEL);
492 else if (hashdist)
493 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
494 else
495 n = (struct hlist_head *)
496 __get_free_pages(GFP_KERNEL, get_order(sz));
497
498 if (n)
499 memset(n, 0, sz);
500
501 return n;
502}
503
504static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz)
505{
506 if (sz <= PAGE_SIZE)
507 kfree(n);
508 else if (hashdist)
509 vfree(n);
510 else
511 free_pages((unsigned long)n, get_order(sz));
512}
513
514static void xfrm_dst_hash_transfer(struct hlist_head *list,
515 struct hlist_head *ndsttable,
516 unsigned int nhashmask)
517{
518 struct hlist_node *entry, *tmp;
519 struct xfrm_policy *pol;
520
521 hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
522 unsigned int h;
523
524 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
525 pol->family, nhashmask);
526 hlist_add_head(&pol->bydst, ndsttable+h);
527 }
528}
529
530static void xfrm_idx_hash_transfer(struct hlist_head *list,
531 struct hlist_head *nidxtable,
532 unsigned int nhashmask)
533{
534 struct hlist_node *entry, *tmp;
535 struct xfrm_policy *pol;
536
537 hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
538 unsigned int h;
539
540 h = __idx_hash(pol->index, nhashmask);
541 hlist_add_head(&pol->byidx, nidxtable+h);
542 }
543}
544
545static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
546{
547 return ((old_hmask + 1) << 1) - 1;
548}
549
550static void xfrm_bydst_resize(int dir)
551{
552 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
553 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
554 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
555 struct hlist_head *odst = xfrm_policy_bydst[dir].table;
556 struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize);
557 int i;
558
559 if (!ndst)
560 return;
561
562 write_lock_bh(&xfrm_policy_lock);
563
564 for (i = hmask; i >= 0; i--)
565 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
566
567 xfrm_policy_bydst[dir].table = ndst;
568 xfrm_policy_bydst[dir].hmask = nhashmask;
569
570 write_unlock_bh(&xfrm_policy_lock);
571
572 xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
573}
574
575static void xfrm_byidx_resize(int total)
576{
577 unsigned int hmask = xfrm_idx_hmask;
578 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
579 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
580 struct hlist_head *oidx = xfrm_policy_byidx;
581 struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize);
582 int i;
583
584 if (!nidx)
585 return;
586
587 write_lock_bh(&xfrm_policy_lock);
588
589 for (i = hmask; i >= 0; i--)
590 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
591
592 xfrm_policy_byidx = nidx;
593 xfrm_idx_hmask = nhashmask;
594
595 write_unlock_bh(&xfrm_policy_lock);
596
597 xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
598}
599
600static inline int xfrm_bydst_should_resize(int dir, int *total)
601{
602 unsigned int cnt = xfrm_policy_count[dir];
603 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
604
605 if (total)
606 *total += cnt;
607
608 if ((hmask + 1) < xfrm_policy_hashmax &&
609 cnt > hmask)
610 return 1;
611
612 return 0;
613}
614
615static inline int xfrm_byidx_should_resize(int total)
616{
617 unsigned int hmask = xfrm_idx_hmask;
618
619 if ((hmask + 1) < xfrm_policy_hashmax &&
620 total > hmask)
621 return 1;
622
623 return 0;
624}
625
626static DEFINE_MUTEX(hash_resize_mutex);
627
628static void xfrm_hash_resize(void *__unused)
629{
630 int dir, total;
631
632 mutex_lock(&hash_resize_mutex);
633
634 total = 0;
635 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
636 if (xfrm_bydst_should_resize(dir, &total))
637 xfrm_bydst_resize(dir);
638 }
639 if (xfrm_byidx_should_resize(total))
640 xfrm_byidx_resize(total);
641
642 mutex_unlock(&hash_resize_mutex);
643}
644
645static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
646
416/* Generate new index... KAME seems to generate them ordered by cost 647/* Generate new index... KAME seems to generate them ordered by cost
417 * of an absolute inpredictability of ordering of rules. This will not pass. */ 648 * of an absolute inpredictability of ordering of rules. This will not pass. */
418static u32 xfrm_gen_index(u8 type, int dir) 649static u32 xfrm_gen_index(u8 type, int dir)
419{ 650{
420 u32 idx;
421 struct xfrm_policy *p;
422 static u32 idx_generator; 651 static u32 idx_generator;
423 652
424 for (;;) { 653 for (;;) {
654 struct hlist_node *entry;
655 struct hlist_head *list;
656 struct xfrm_policy *p;
657 u32 idx;
658 int found;
659
425 idx = (idx_generator | dir); 660 idx = (idx_generator | dir);
426 idx_generator += 8; 661 idx_generator += 8;
427 if (idx == 0) 662 if (idx == 0)
428 idx = 8; 663 idx = 8;
429 for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { 664 list = xfrm_policy_byidx + idx_hash(idx);
430 if (p->index == idx) 665 found = 0;
666 hlist_for_each_entry(p, entry, list, byidx) {
667 if (p->index == idx) {
668 found = 1;
431 break; 669 break;
670 }
432 } 671 }
433 if (!p) 672 if (!found)
434 return idx; 673 return idx;
435 } 674 }
436} 675}
437 676
677static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
678{
679 u32 *p1 = (u32 *) s1;
680 u32 *p2 = (u32 *) s2;
681 int len = sizeof(struct xfrm_selector) / sizeof(u32);
682 int i;
683
684 for (i = 0; i < len; i++) {
685 if (p1[i] != p2[i])
686 return 1;
687 }
688
689 return 0;
690}
691
438int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) 692int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
439{ 693{
440 struct xfrm_policy *pol, **p; 694 struct xfrm_policy *pol;
441 struct xfrm_policy *delpol = NULL; 695 struct xfrm_policy *delpol;
442 struct xfrm_policy **newpos = NULL; 696 struct hlist_head *chain;
697 struct hlist_node *entry, *newpos, *last;
443 struct dst_entry *gc_list; 698 struct dst_entry *gc_list;
444 699
445 write_lock_bh(&xfrm_policy_lock); 700 write_lock_bh(&xfrm_policy_lock);
446 for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { 701 chain = policy_hash_bysel(&policy->selector, policy->family, dir);
447 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && 702 delpol = NULL;
703 newpos = NULL;
704 last = NULL;
705 hlist_for_each_entry(pol, entry, chain, bydst) {
706 if (!delpol &&
707 pol->type == policy->type &&
708 !selector_cmp(&pol->selector, &policy->selector) &&
448 xfrm_sec_ctx_match(pol->security, policy->security)) { 709 xfrm_sec_ctx_match(pol->security, policy->security)) {
449 if (excl) { 710 if (excl) {
450 write_unlock_bh(&xfrm_policy_lock); 711 write_unlock_bh(&xfrm_policy_lock);
451 return -EEXIST; 712 return -EEXIST;
452 } 713 }
453 *p = pol->next;
454 delpol = pol; 714 delpol = pol;
455 if (policy->priority > pol->priority) 715 if (policy->priority > pol->priority)
456 continue; 716 continue;
457 } else if (policy->priority >= pol->priority) { 717 } else if (policy->priority >= pol->priority) {
458 p = &pol->next; 718 last = &pol->bydst;
459 continue; 719 continue;
460 } 720 }
461 if (!newpos) 721 if (!newpos)
462 newpos = p; 722 newpos = &pol->bydst;
463 if (delpol) 723 if (delpol)
464 break; 724 break;
465 p = &pol->next; 725 last = &pol->bydst;
466 } 726 }
727 if (!newpos)
728 newpos = last;
467 if (newpos) 729 if (newpos)
468 p = newpos; 730 hlist_add_after(newpos, &policy->bydst);
731 else
732 hlist_add_head(&policy->bydst, chain);
469 xfrm_pol_hold(policy); 733 xfrm_pol_hold(policy);
470 policy->next = *p; 734 xfrm_policy_count[dir]++;
471 *p = policy;
472 atomic_inc(&flow_cache_genid); 735 atomic_inc(&flow_cache_genid);
736 if (delpol) {
737 hlist_del(&delpol->bydst);
738 hlist_del(&delpol->byidx);
739 xfrm_policy_count[dir]--;
740 }
473 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); 741 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
742 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
474 policy->curlft.add_time = (unsigned long)xtime.tv_sec; 743 policy->curlft.add_time = (unsigned long)xtime.tv_sec;
475 policy->curlft.use_time = 0; 744 policy->curlft.use_time = 0;
476 if (!mod_timer(&policy->timer, jiffies + HZ)) 745 if (!mod_timer(&policy->timer, jiffies + HZ))
@@ -479,10 +748,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
479 748
480 if (delpol) 749 if (delpol)
481 xfrm_policy_kill(delpol); 750 xfrm_policy_kill(delpol);
751 else if (xfrm_bydst_should_resize(dir, NULL))
752 schedule_work(&xfrm_hash_work);
482 753
483 read_lock_bh(&xfrm_policy_lock); 754 read_lock_bh(&xfrm_policy_lock);
484 gc_list = NULL; 755 gc_list = NULL;
485 for (policy = policy->next; policy; policy = policy->next) { 756 entry = &policy->bydst;
757 hlist_for_each_entry_continue(policy, entry, bydst) {
486 struct dst_entry *dst; 758 struct dst_entry *dst;
487 759
488 write_lock(&policy->lock); 760 write_lock(&policy->lock);
@@ -515,67 +787,112 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
515 struct xfrm_selector *sel, 787 struct xfrm_selector *sel,
516 struct xfrm_sec_ctx *ctx, int delete) 788 struct xfrm_sec_ctx *ctx, int delete)
517{ 789{
518 struct xfrm_policy *pol, **p; 790 struct xfrm_policy *pol, *ret;
791 struct hlist_head *chain;
792 struct hlist_node *entry;
519 793
520 write_lock_bh(&xfrm_policy_lock); 794 write_lock_bh(&xfrm_policy_lock);
521 for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { 795 chain = policy_hash_bysel(sel, sel->family, dir);
522 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && 796 ret = NULL;
523 (xfrm_sec_ctx_match(ctx, pol->security))) { 797 hlist_for_each_entry(pol, entry, chain, bydst) {
798 if (pol->type == type &&
799 !selector_cmp(sel, &pol->selector) &&
800 xfrm_sec_ctx_match(ctx, pol->security)) {
524 xfrm_pol_hold(pol); 801 xfrm_pol_hold(pol);
525 if (delete) 802 if (delete) {
526 *p = pol->next; 803 hlist_del(&pol->bydst);
804 hlist_del(&pol->byidx);
805 xfrm_policy_count[dir]--;
806 }
807 ret = pol;
527 break; 808 break;
528 } 809 }
529 } 810 }
530 write_unlock_bh(&xfrm_policy_lock); 811 write_unlock_bh(&xfrm_policy_lock);
531 812
532 if (pol && delete) { 813 if (ret && delete) {
533 atomic_inc(&flow_cache_genid); 814 atomic_inc(&flow_cache_genid);
534 xfrm_policy_kill(pol); 815 xfrm_policy_kill(ret);
535 } 816 }
536 return pol; 817 return ret;
537} 818}
538EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 819EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
539 820
540struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) 821struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
541{ 822{
542 struct xfrm_policy *pol, **p; 823 struct xfrm_policy *pol, *ret;
824 struct hlist_head *chain;
825 struct hlist_node *entry;
543 826
544 write_lock_bh(&xfrm_policy_lock); 827 write_lock_bh(&xfrm_policy_lock);
545 for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { 828 chain = xfrm_policy_byidx + idx_hash(id);
546 if (pol->index == id) { 829 ret = NULL;
830 hlist_for_each_entry(pol, entry, chain, byidx) {
831 if (pol->type == type && pol->index == id) {
547 xfrm_pol_hold(pol); 832 xfrm_pol_hold(pol);
548 if (delete) 833 if (delete) {
549 *p = pol->next; 834 hlist_del(&pol->bydst);
835 hlist_del(&pol->byidx);
836 xfrm_policy_count[dir]--;
837 }
838 ret = pol;
550 break; 839 break;
551 } 840 }
552 } 841 }
553 write_unlock_bh(&xfrm_policy_lock); 842 write_unlock_bh(&xfrm_policy_lock);
554 843
555 if (pol && delete) { 844 if (ret && delete) {
556 atomic_inc(&flow_cache_genid); 845 atomic_inc(&flow_cache_genid);
557 xfrm_policy_kill(pol); 846 xfrm_policy_kill(ret);
558 } 847 }
559 return pol; 848 return ret;
560} 849}
561EXPORT_SYMBOL(xfrm_policy_byid); 850EXPORT_SYMBOL(xfrm_policy_byid);
562 851
563void xfrm_policy_flush(u8 type) 852void xfrm_policy_flush(u8 type)
564{ 853{
565 struct xfrm_policy *xp;
566 struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type);
567 int dir; 854 int dir;
568 855
569 write_lock_bh(&xfrm_policy_lock); 856 write_lock_bh(&xfrm_policy_lock);
570 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 857 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
571 while ((xp = p_list[dir]) != NULL) { 858 struct xfrm_policy *pol;
572 p_list[dir] = xp->next; 859 struct hlist_node *entry;
860 int i;
861
862 again1:
863 hlist_for_each_entry(pol, entry,
864 &xfrm_policy_inexact[dir], bydst) {
865 if (pol->type != type)
866 continue;
867 hlist_del(&pol->bydst);
868 hlist_del(&pol->byidx);
573 write_unlock_bh(&xfrm_policy_lock); 869 write_unlock_bh(&xfrm_policy_lock);
574 870
575 xfrm_policy_kill(xp); 871 xfrm_policy_kill(pol);
576 872
577 write_lock_bh(&xfrm_policy_lock); 873 write_lock_bh(&xfrm_policy_lock);
874 goto again1;
875 }
876
877 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
878 again2:
879 hlist_for_each_entry(pol, entry,
880 xfrm_policy_bydst[dir].table + i,
881 bydst) {
882 if (pol->type != type)
883 continue;
884 hlist_del(&pol->bydst);
885 hlist_del(&pol->byidx);
886 write_unlock_bh(&xfrm_policy_lock);
887
888 xfrm_policy_kill(pol);
889
890 write_lock_bh(&xfrm_policy_lock);
891 goto again2;
892 }
578 } 893 }
894
895 xfrm_policy_count[dir] = 0;
579 } 896 }
580 atomic_inc(&flow_cache_genid); 897 atomic_inc(&flow_cache_genid);
581 write_unlock_bh(&xfrm_policy_lock); 898 write_unlock_bh(&xfrm_policy_lock);
@@ -585,15 +902,27 @@ EXPORT_SYMBOL(xfrm_policy_flush);
585int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), 902int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
586 void *data) 903 void *data)
587{ 904{
588 struct xfrm_policy *xp; 905 struct xfrm_policy *pol;
589 int dir; 906 struct hlist_node *entry;
590 int count = 0; 907 int dir, count, error;
591 int error = 0;
592 908
593 read_lock_bh(&xfrm_policy_lock); 909 read_lock_bh(&xfrm_policy_lock);
910 count = 0;
594 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 911 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
595 for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) 912 struct hlist_head *table = xfrm_policy_bydst[dir].table;
596 count++; 913 int i;
914
915 hlist_for_each_entry(pol, entry,
916 &xfrm_policy_inexact[dir], bydst) {
917 if (pol->type == type)
918 count++;
919 }
920 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
921 hlist_for_each_entry(pol, entry, table + i, bydst) {
922 if (pol->type == type)
923 count++;
924 }
925 }
597 } 926 }
598 927
599 if (count == 0) { 928 if (count == 0) {
@@ -602,13 +931,28 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*)
602 } 931 }
603 932
604 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { 933 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
605 for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { 934 struct hlist_head *table = xfrm_policy_bydst[dir].table;
606 error = func(xp, dir%XFRM_POLICY_MAX, --count, data); 935 int i;
936
937 hlist_for_each_entry(pol, entry,
938 &xfrm_policy_inexact[dir], bydst) {
939 if (pol->type != type)
940 continue;
941 error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
607 if (error) 942 if (error)
608 goto out; 943 goto out;
609 } 944 }
945 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
946 hlist_for_each_entry(pol, entry, table + i, bydst) {
947 if (pol->type != type)
948 continue;
949 error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
950 if (error)
951 goto out;
952 }
953 }
610 } 954 }
611 955 error = 0;
612out: 956out:
613 read_unlock_bh(&xfrm_policy_lock); 957 read_unlock_bh(&xfrm_policy_lock);
614 return error; 958 return error;
@@ -617,31 +961,61 @@ EXPORT_SYMBOL(xfrm_policy_walk);
617 961
618/* Find policy to apply to this flow. */ 962/* Find policy to apply to this flow. */
619 963
620static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, 964static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
621 u16 family, u8 dir) 965 u8 type, u16 family, int dir)
622{ 966{
623 struct xfrm_policy *pol; 967 struct xfrm_selector *sel = &pol->selector;
968 int match;
624 969
625 read_lock_bh(&xfrm_policy_lock); 970 if (pol->family != family ||
626 for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { 971 pol->type != type)
627 struct xfrm_selector *sel = &pol->selector; 972 return 0;
628 int match;
629 973
630 if (pol->family != family) 974 match = xfrm_selector_match(sel, fl, family);
631 continue; 975 if (match) {
976 if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
977 return 1;
978 }
979
980 return 0;
981}
632 982
633 match = xfrm_selector_match(sel, fl, family); 983static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
984 u16 family, u8 dir)
985{
986 struct xfrm_policy *pol, *ret;
987 xfrm_address_t *daddr, *saddr;
988 struct hlist_node *entry;
989 struct hlist_head *chain;
634 990
635 if (match) { 991 daddr = xfrm_flowi_daddr(fl, family);
636 if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { 992 saddr = xfrm_flowi_saddr(fl, family);
993 if (unlikely(!daddr || !saddr))
994 return NULL;
995
996 read_lock_bh(&xfrm_policy_lock);
997 chain = policy_hash_direct(daddr, saddr, family, dir);
998 ret = NULL;
999 hlist_for_each_entry(pol, entry, chain, bydst) {
1000 if (xfrm_policy_match(pol, fl, type, family, dir)) {
1001 xfrm_pol_hold(pol);
1002 ret = pol;
1003 break;
1004 }
1005 }
1006 if (!ret) {
1007 chain = &xfrm_policy_inexact[dir];
1008 hlist_for_each_entry(pol, entry, chain, bydst) {
1009 if (xfrm_policy_match(pol, fl, type, family, dir)) {
637 xfrm_pol_hold(pol); 1010 xfrm_pol_hold(pol);
1011 ret = pol;
638 break; 1012 break;
639 } 1013 }
640 } 1014 }
641 } 1015 }
642 read_unlock_bh(&xfrm_policy_lock); 1016 read_unlock_bh(&xfrm_policy_lock);
643 1017
644 return pol; 1018 return ret;
645} 1019}
646 1020
647static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, 1021static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
@@ -657,7 +1031,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
657 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1031 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
658 1032
659#ifdef CONFIG_XFRM_SUB_POLICY 1033#ifdef CONFIG_XFRM_SUB_POLICY
660 end: 1034end:
661#endif 1035#endif
662 if ((*objp = (void *) pol) != NULL) 1036 if ((*objp = (void *) pol) != NULL)
663 *obj_refp = &pol->refcnt; 1037 *obj_refp = &pol->refcnt;
@@ -704,26 +1078,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc
704 1078
705static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) 1079static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
706{ 1080{
707 struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); 1081 struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1082 pol->family, dir);
708 1083
709 pol->next = p_list[dir]; 1084 hlist_add_head(&pol->bydst, chain);
710 p_list[dir] = pol; 1085 hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1086 xfrm_policy_count[dir]++;
711 xfrm_pol_hold(pol); 1087 xfrm_pol_hold(pol);
1088
1089 if (xfrm_bydst_should_resize(dir, NULL))
1090 schedule_work(&xfrm_hash_work);
712} 1091}
713 1092
714static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 1093static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
715 int dir) 1094 int dir)
716{ 1095{
717 struct xfrm_policy **polp; 1096 if (hlist_unhashed(&pol->bydst))
1097 return NULL;
718 1098
719 for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); 1099 hlist_del(&pol->bydst);
720 *polp != NULL; polp = &(*polp)->next) { 1100 hlist_del(&pol->byidx);
721 if (*polp == pol) { 1101 xfrm_policy_count[dir]--;
722 *polp = pol->next; 1102
723 return pol; 1103 return pol;
724 }
725 }
726 return NULL;
727} 1104}
728 1105
729int xfrm_policy_delete(struct xfrm_policy *pol, int dir) 1106int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
@@ -968,7 +1345,8 @@ restart:
968 1345
969 if (!policy) { 1346 if (!policy) {
970 /* To accelerate a bit... */ 1347 /* To accelerate a bit... */
971 if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) 1348 if ((dst_orig->flags & DST_NOXFRM) ||
1349 !xfrm_policy_count[XFRM_POLICY_OUT])
972 return 0; 1350 return 0;
973 1351
974 policy = flow_cache_lookup(fl, dst_orig->ops->family, 1352 policy = flow_cache_lookup(fl, dst_orig->ops->family,
@@ -1413,50 +1791,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1413 return dst; 1791 return dst;
1414} 1792}
1415 1793
1794static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
1795{
1796 struct dst_entry *dst, **dstp;
1797
1798 write_lock(&pol->lock);
1799 dstp = &pol->bundles;
1800 while ((dst=*dstp) != NULL) {
1801 if (func(dst)) {
1802 *dstp = dst->next;
1803 dst->next = *gc_list_p;
1804 *gc_list_p = dst;
1805 } else {
1806 dstp = &dst->next;
1807 }
1808 }
1809 write_unlock(&pol->lock);
1810}
1811
1416static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) 1812static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1417{ 1813{
1418 int i; 1814 struct dst_entry *gc_list = NULL;
1419 struct xfrm_policy *pol; 1815 int dir;
1420 struct dst_entry *dst, **dstp, *gc_list = NULL;
1421 1816
1422 read_lock_bh(&xfrm_policy_lock); 1817 read_lock_bh(&xfrm_policy_lock);
1423 for (i=0; i<2*XFRM_POLICY_MAX; i++) { 1818 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
1424#ifdef CONFIG_XFRM_SUB_POLICY 1819 struct xfrm_policy *pol;
1425 for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { 1820 struct hlist_node *entry;
1426 write_lock(&pol->lock); 1821 struct hlist_head *table;
1427 dstp = &pol->bundles; 1822 int i;
1428 while ((dst=*dstp) != NULL) {
1429 if (func(dst)) {
1430 *dstp = dst->next;
1431 dst->next = gc_list;
1432 gc_list = dst;
1433 } else {
1434 dstp = &dst->next;
1435 }
1436 }
1437 write_unlock(&pol->lock);
1438 }
1439 1823
1440#endif 1824 hlist_for_each_entry(pol, entry,
1441 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { 1825 &xfrm_policy_inexact[dir], bydst)
1442 write_lock(&pol->lock); 1826 prune_one_bundle(pol, func, &gc_list);
1443 dstp = &pol->bundles; 1827
1444 while ((dst=*dstp) != NULL) { 1828 table = xfrm_policy_bydst[dir].table;
1445 if (func(dst)) { 1829 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
1446 *dstp = dst->next; 1830 hlist_for_each_entry(pol, entry, table + i, bydst)
1447 dst->next = gc_list; 1831 prune_one_bundle(pol, func, &gc_list);
1448 gc_list = dst;
1449 } else {
1450 dstp = &dst->next;
1451 }
1452 }
1453 write_unlock(&pol->lock);
1454 } 1832 }
1455 } 1833 }
1456 read_unlock_bh(&xfrm_policy_lock); 1834 read_unlock_bh(&xfrm_policy_lock);
1457 1835
1458 while (gc_list) { 1836 while (gc_list) {
1459 dst = gc_list; 1837 struct dst_entry *dst = gc_list;
1460 gc_list = dst->next; 1838 gc_list = dst->next;
1461 dst_free(dst); 1839 dst_free(dst);
1462 } 1840 }
@@ -1680,6 +2058,9 @@ static struct notifier_block xfrm_dev_notifier = {
1680 2058
1681static void __init xfrm_policy_init(void) 2059static void __init xfrm_policy_init(void)
1682{ 2060{
2061 unsigned int hmask, sz;
2062 int dir;
2063
1683 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", 2064 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
1684 sizeof(struct xfrm_dst), 2065 sizeof(struct xfrm_dst),
1685 0, SLAB_HWCACHE_ALIGN, 2066 0, SLAB_HWCACHE_ALIGN,
@@ -1687,6 +2068,26 @@ static void __init xfrm_policy_init(void)
1687 if (!xfrm_dst_cache) 2068 if (!xfrm_dst_cache)
1688 panic("XFRM: failed to allocate xfrm_dst_cache\n"); 2069 panic("XFRM: failed to allocate xfrm_dst_cache\n");
1689 2070
2071 hmask = 8 - 1;
2072 sz = (hmask+1) * sizeof(struct hlist_head);
2073
2074 xfrm_policy_byidx = xfrm_policy_hash_alloc(sz);
2075 xfrm_idx_hmask = hmask;
2076 if (!xfrm_policy_byidx)
2077 panic("XFRM: failed to allocate byidx hash\n");
2078
2079 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2080 struct xfrm_policy_hash *htab;
2081
2082 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2083
2084 htab = &xfrm_policy_bydst[dir];
2085 htab->table = xfrm_policy_hash_alloc(sz);
2086 htab->hmask = hmask;
2087 if (!htab->table)
2088 panic("XFRM: failed to allocate bydst hash\n");
2089 }
2090
1690 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); 2091 INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
1691 register_netdevice_notifier(&xfrm_dev_notifier); 2092 register_netdevice_notifier(&xfrm_dev_notifier);
1692} 2093}