diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r-- | net/ipv4/fib_trie.c | 202 |
1 files changed, 168 insertions, 34 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b56e88edf1b3..4be234c7d8c3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -43,7 +43,7 @@ | |||
43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
44 | */ | 44 | */ |
45 | 45 | ||
46 | #define VERSION "0.324" | 46 | #define VERSION "0.325" |
47 | 47 | ||
48 | #include <linux/config.h> | 48 | #include <linux/config.h> |
49 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
@@ -136,6 +136,7 @@ struct trie_use_stats { | |||
136 | unsigned int semantic_match_passed; | 136 | unsigned int semantic_match_passed; |
137 | unsigned int semantic_match_miss; | 137 | unsigned int semantic_match_miss; |
138 | unsigned int null_node_hit; | 138 | unsigned int null_node_hit; |
139 | unsigned int resize_node_skipped; | ||
139 | }; | 140 | }; |
140 | #endif | 141 | #endif |
141 | 142 | ||
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | |||
164 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); | 165 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); |
165 | static int tnode_child_length(struct tnode *tn); | 166 | static int tnode_child_length(struct tnode *tn); |
166 | static struct node *resize(struct trie *t, struct tnode *tn); | 167 | static struct node *resize(struct trie *t, struct tnode *tn); |
167 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 168 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err); |
168 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 169 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err); |
169 | static void tnode_free(struct tnode *tn); | 170 | static void tnode_free(struct tnode *tn); |
170 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); | 171 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); |
171 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); | 172 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); |
@@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li) | |||
358 | kfree(li); | 359 | kfree(li); |
359 | } | 360 | } |
360 | 361 | ||
362 | static struct tnode *tnode_alloc(unsigned int size) | ||
363 | { | ||
364 | if (size <= PAGE_SIZE) { | ||
365 | return kmalloc(size, GFP_KERNEL); | ||
366 | } else { | ||
367 | return (struct tnode *) | ||
368 | __get_free_pages(GFP_KERNEL, get_order(size)); | ||
369 | } | ||
370 | } | ||
371 | |||
372 | static void __tnode_free(struct tnode *tn) | ||
373 | { | ||
374 | unsigned int size = sizeof(struct tnode) + | ||
375 | (1<<tn->bits) * sizeof(struct node *); | ||
376 | |||
377 | if (size <= PAGE_SIZE) | ||
378 | kfree(tn); | ||
379 | else | ||
380 | free_pages((unsigned long)tn, get_order(size)); | ||
381 | } | ||
382 | |||
361 | static struct tnode* tnode_new(t_key key, int pos, int bits) | 383 | static struct tnode* tnode_new(t_key key, int pos, int bits) |
362 | { | 384 | { |
363 | int nchildren = 1<<bits; | 385 | int nchildren = 1<<bits; |
364 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); | 386 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); |
365 | struct tnode *tn = kmalloc(sz, GFP_KERNEL); | 387 | struct tnode *tn = tnode_alloc(sz); |
366 | 388 | ||
367 | if(tn) { | 389 | if(tn) { |
368 | memset(tn, 0, sz); | 390 | memset(tn, 0, sz); |
@@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn) | |||
390 | printk("FL %p \n", tn); | 412 | printk("FL %p \n", tn); |
391 | } | 413 | } |
392 | else if(IS_TNODE(tn)) { | 414 | else if(IS_TNODE(tn)) { |
393 | kfree(tn); | 415 | __tnode_free(tn); |
394 | if(trie_debug > 0 ) | 416 | if(trie_debug > 0 ) |
395 | printk("FT %p \n", tn); | 417 | printk("FT %p \n", tn); |
396 | } | 418 | } |
@@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w | |||
460 | static struct node *resize(struct trie *t, struct tnode *tn) | 482 | static struct node *resize(struct trie *t, struct tnode *tn) |
461 | { | 483 | { |
462 | int i; | 484 | int i; |
485 | int err = 0; | ||
463 | 486 | ||
464 | if (!tn) | 487 | if (!tn) |
465 | return NULL; | 488 | return NULL; |
@@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
556 | */ | 579 | */ |
557 | 580 | ||
558 | check_tnode(tn); | 581 | check_tnode(tn); |
559 | 582 | ||
583 | err = 0; | ||
560 | while ((tn->full_children > 0 && | 584 | while ((tn->full_children > 0 && |
561 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= | 585 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= |
562 | inflate_threshold * tnode_child_length(tn))) { | 586 | inflate_threshold * tnode_child_length(tn))) { |
563 | 587 | ||
564 | tn = inflate(t, tn); | 588 | tn = inflate(t, tn, &err); |
589 | |||
590 | if(err) { | ||
591 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
592 | t->stats.resize_node_skipped++; | ||
593 | #endif | ||
594 | break; | ||
595 | } | ||
565 | } | 596 | } |
566 | 597 | ||
567 | check_tnode(tn); | 598 | check_tnode(tn); |
@@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
570 | * Halve as long as the number of empty children in this | 601 | * Halve as long as the number of empty children in this |
571 | * node is above threshold. | 602 | * node is above threshold. |
572 | */ | 603 | */ |
604 | |||
605 | err = 0; | ||
573 | while (tn->bits > 1 && | 606 | while (tn->bits > 1 && |
574 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 607 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
575 | halve_threshold * tnode_child_length(tn)) | 608 | halve_threshold * tnode_child_length(tn)) { |
609 | |||
610 | tn = halve(t, tn, &err); | ||
611 | |||
612 | if(err) { | ||
613 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
614 | t->stats.resize_node_skipped++; | ||
615 | #endif | ||
616 | break; | ||
617 | } | ||
618 | } | ||
576 | 619 | ||
577 | tn = halve(t, tn); | ||
578 | 620 | ||
579 | /* Only one child remains */ | 621 | /* Only one child remains */ |
580 | 622 | ||
@@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
599 | return (struct node *) tn; | 641 | return (struct node *) tn; |
600 | } | 642 | } |
601 | 643 | ||
602 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 644 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) |
603 | { | 645 | { |
604 | struct tnode *inode; | 646 | struct tnode *inode; |
605 | struct tnode *oldtnode = tn; | 647 | struct tnode *oldtnode = tn; |
@@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
611 | 653 | ||
612 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); | 654 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); |
613 | 655 | ||
614 | if (!tn) | 656 | if (!tn) { |
615 | trie_bug("tnode_new failed"); | 657 | *err = -ENOMEM; |
658 | return oldtnode; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Preallocate and store tnodes before the actual work so we | ||
663 | * don't get into an inconsistent state if memory allocation | ||
664 | * fails. In case of failure we return the oldnode and inflate | ||
665 | * of tnode is ignored. | ||
666 | */ | ||
667 | |||
668 | for(i = 0; i < olen; i++) { | ||
669 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); | ||
670 | |||
671 | if (inode && | ||
672 | IS_TNODE(inode) && | ||
673 | inode->pos == oldtnode->pos + oldtnode->bits && | ||
674 | inode->bits > 1) { | ||
675 | struct tnode *left, *right; | ||
676 | |||
677 | t_key m = TKEY_GET_MASK(inode->pos, 1); | ||
678 | |||
679 | left = tnode_new(inode->key&(~m), inode->pos + 1, | ||
680 | inode->bits - 1); | ||
681 | |||
682 | if(!left) { | ||
683 | *err = -ENOMEM; | ||
684 | break; | ||
685 | } | ||
686 | |||
687 | right = tnode_new(inode->key|m, inode->pos + 1, | ||
688 | inode->bits - 1); | ||
689 | |||
690 | if(!right) { | ||
691 | *err = -ENOMEM; | ||
692 | break; | ||
693 | } | ||
694 | |||
695 | put_child(t, tn, 2*i, (struct node *) left); | ||
696 | put_child(t, tn, 2*i+1, (struct node *) right); | ||
697 | } | ||
698 | } | ||
699 | |||
700 | if(*err) { | ||
701 | int size = tnode_child_length(tn); | ||
702 | int j; | ||
703 | |||
704 | for(j = 0; j < size; j++) | ||
705 | if( tn->child[j]) | ||
706 | tnode_free((struct tnode *)tn->child[j]); | ||
707 | |||
708 | tnode_free(tn); | ||
709 | |||
710 | *err = -ENOMEM; | ||
711 | return oldtnode; | ||
712 | } | ||
616 | 713 | ||
617 | for(i = 0; i < olen; i++) { | 714 | for(i = 0; i < olen; i++) { |
618 | struct node *node = tnode_get_child(oldtnode, i); | 715 | struct node *node = tnode_get_child(oldtnode, i); |
@@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
625 | 722 | ||
626 | if(IS_LEAF(node) || ((struct tnode *) node)->pos > | 723 | if(IS_LEAF(node) || ((struct tnode *) node)->pos > |
627 | tn->pos + tn->bits - 1) { | 724 | tn->pos + tn->bits - 1) { |
628 | if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, | 725 | if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, |
629 | 1) == 0) | 726 | 1) == 0) |
630 | put_child(t, tn, 2*i, node); | 727 | put_child(t, tn, 2*i, node); |
631 | else | 728 | else |
@@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
665 | * the position (inode->pos) | 762 | * the position (inode->pos) |
666 | */ | 763 | */ |
667 | 764 | ||
668 | t_key m = TKEY_GET_MASK(inode->pos, 1); | ||
669 | |||
670 | /* Use the old key, but set the new significant | 765 | /* Use the old key, but set the new significant |
671 | * bit to zero. | 766 | * bit to zero. |
672 | */ | 767 | */ |
673 | left = tnode_new(inode->key&(~m), inode->pos + 1, | ||
674 | inode->bits - 1); | ||
675 | 768 | ||
676 | if(!left) | 769 | left = (struct tnode *) tnode_get_child(tn, 2*i); |
677 | trie_bug("tnode_new failed"); | 770 | put_child(t, tn, 2*i, NULL); |
678 | 771 | ||
679 | 772 | if(!left) | |
680 | /* Use the old key, but set the new significant | 773 | BUG(); |
681 | * bit to one. | 774 | |
682 | */ | 775 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); |
683 | right = tnode_new(inode->key|m, inode->pos + 1, | 776 | put_child(t, tn, 2*i+1, NULL); |
684 | inode->bits - 1); | 777 | |
778 | if(!right) | ||
779 | BUG(); | ||
685 | 780 | ||
686 | if(!right) | ||
687 | trie_bug("tnode_new failed"); | ||
688 | |||
689 | size = tnode_child_length(left); | 781 | size = tnode_child_length(left); |
690 | for(j = 0; j < size; j++) { | 782 | for(j = 0; j < size; j++) { |
691 | put_child(t, left, j, inode->child[j]); | 783 | put_child(t, left, j, inode->child[j]); |
@@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
701 | return tn; | 793 | return tn; |
702 | } | 794 | } |
703 | 795 | ||
704 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 796 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) |
705 | { | 797 | { |
706 | struct tnode *oldtnode = tn; | 798 | struct tnode *oldtnode = tn; |
707 | struct node *left, *right; | 799 | struct node *left, *right; |
@@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
712 | 804 | ||
713 | tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); | 805 | tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); |
714 | 806 | ||
715 | if(!tn) | 807 | if (!tn) { |
716 | trie_bug("tnode_new failed"); | 808 | *err = -ENOMEM; |
809 | return oldtnode; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Preallocate and store tnodes before the actual work so we | ||
814 | * don't get into an inconsistent state if memory allocation | ||
815 | * fails. In case of failure we return the oldnode and halve | ||
816 | * of tnode is ignored. | ||
817 | */ | ||
818 | |||
819 | for(i = 0; i < olen; i += 2) { | ||
820 | left = tnode_get_child(oldtnode, i); | ||
821 | right = tnode_get_child(oldtnode, i+1); | ||
822 | |||
823 | /* Two nonempty children */ | ||
824 | if( left && right) { | ||
825 | struct tnode *newBinNode = | ||
826 | tnode_new(left->key, tn->pos + tn->bits, 1); | ||
827 | |||
828 | if(!newBinNode) { | ||
829 | *err = -ENOMEM; | ||
830 | break; | ||
831 | } | ||
832 | put_child(t, tn, i/2, (struct node *)newBinNode); | ||
833 | } | ||
834 | } | ||
835 | |||
836 | if(*err) { | ||
837 | int size = tnode_child_length(tn); | ||
838 | int j; | ||
839 | |||
840 | for(j = 0; j < size; j++) | ||
841 | if( tn->child[j]) | ||
842 | tnode_free((struct tnode *)tn->child[j]); | ||
843 | |||
844 | tnode_free(tn); | ||
845 | |||
846 | *err = -ENOMEM; | ||
847 | return oldtnode; | ||
848 | } | ||
717 | 849 | ||
718 | for(i = 0; i < olen; i += 2) { | 850 | for(i = 0; i < olen; i += 2) { |
719 | left = tnode_get_child(oldtnode, i); | 851 | left = tnode_get_child(oldtnode, i); |
@@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
730 | /* Two nonempty children */ | 862 | /* Two nonempty children */ |
731 | else { | 863 | else { |
732 | struct tnode *newBinNode = | 864 | struct tnode *newBinNode = |
733 | tnode_new(left->key, tn->pos + tn->bits, 1); | 865 | (struct tnode *) tnode_get_child(tn, i/2); |
866 | put_child(t, tn, i/2, NULL); | ||
734 | 867 | ||
735 | if(!newBinNode) | 868 | if(!newBinNode) |
736 | trie_bug("tnode_new failed"); | 869 | BUG(); |
737 | 870 | ||
738 | put_child(t, newBinNode, 0, left); | 871 | put_child(t, newBinNode, 0, left); |
739 | put_child(t, newBinNode, 1, right); | 872 | put_child(t, newBinNode, 1, right); |
@@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) | |||
2301 | seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); | 2434 | seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); |
2302 | seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); | 2435 | seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); |
2303 | seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); | 2436 | seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); |
2437 | seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); | ||
2304 | #ifdef CLEAR_STATS | 2438 | #ifdef CLEAR_STATS |
2305 | memset(&(t->stats), 0, sizeof(t->stats)); | 2439 | memset(&(t->stats), 0, sizeof(t->stats)); |
2306 | #endif | 2440 | #endif |