aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_trie.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r--net/ipv4/fib_trie.c256
1 files changed, 206 insertions, 50 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 0671569ee6f0..4be234c7d8c3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.323" 46#define VERSION "0.325"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -136,6 +136,7 @@ struct trie_use_stats {
136 unsigned int semantic_match_passed; 136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss; 137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit; 138 unsigned int null_node_hit;
139 unsigned int resize_node_skipped;
139}; 140};
140#endif 141#endif
141 142
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 165static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn); 166static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn); 167static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn); 168static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
168static struct tnode *halve(struct trie *t, struct tnode *tn); 169static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
169static void tnode_free(struct tnode *tn); 170static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t); 171static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 172extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
@@ -341,8 +342,10 @@ static struct leaf *leaf_new(void)
341static struct leaf_info *leaf_info_new(int plen) 342static struct leaf_info *leaf_info_new(int plen)
342{ 343{
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL); 344 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
344 li->plen = plen; 345 if(li) {
345 INIT_LIST_HEAD(&li->falh); 346 li->plen = plen;
347 INIT_LIST_HEAD(&li->falh);
348 }
346 return li; 349 return li;
347} 350}
348 351
@@ -356,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
356 kfree(li); 359 kfree(li);
357} 360}
358 361
362static struct tnode *tnode_alloc(unsigned int size)
363{
364 if (size <= PAGE_SIZE) {
365 return kmalloc(size, GFP_KERNEL);
366 } else {
367 return (struct tnode *)
368 __get_free_pages(GFP_KERNEL, get_order(size));
369 }
370}
371
372static void __tnode_free(struct tnode *tn)
373{
374 unsigned int size = sizeof(struct tnode) +
375 (1<<tn->bits) * sizeof(struct node *);
376
377 if (size <= PAGE_SIZE)
378 kfree(tn);
379 else
380 free_pages((unsigned long)tn, get_order(size));
381}
382
359static struct tnode* tnode_new(t_key key, int pos, int bits) 383static struct tnode* tnode_new(t_key key, int pos, int bits)
360{ 384{
361 int nchildren = 1<<bits; 385 int nchildren = 1<<bits;
362 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 386 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
363 struct tnode *tn = kmalloc(sz, GFP_KERNEL); 387 struct tnode *tn = tnode_alloc(sz);
364 388
365 if(tn) { 389 if(tn) {
366 memset(tn, 0, sz); 390 memset(tn, 0, sz);
@@ -388,7 +412,7 @@ static void tnode_free(struct tnode *tn)
388 printk("FL %p \n", tn); 412 printk("FL %p \n", tn);
389 } 413 }
390 else if(IS_TNODE(tn)) { 414 else if(IS_TNODE(tn)) {
391 kfree(tn); 415 __tnode_free(tn);
392 if(trie_debug > 0 ) 416 if(trie_debug > 0 )
393 printk("FT %p \n", tn); 417 printk("FT %p \n", tn);
394 } 418 }
@@ -458,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
458static struct node *resize(struct trie *t, struct tnode *tn) 482static struct node *resize(struct trie *t, struct tnode *tn)
459{ 483{
460 int i; 484 int i;
485 int err = 0;
461 486
462 if (!tn) 487 if (!tn)
463 return NULL; 488 return NULL;
@@ -554,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
554 */ 579 */
555 580
556 check_tnode(tn); 581 check_tnode(tn);
557 582
583 err = 0;
558 while ((tn->full_children > 0 && 584 while ((tn->full_children > 0 &&
559 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 585 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
560 inflate_threshold * tnode_child_length(tn))) { 586 inflate_threshold * tnode_child_length(tn))) {
561 587
562 tn = inflate(t, tn); 588 tn = inflate(t, tn, &err);
589
590 if(err) {
591#ifdef CONFIG_IP_FIB_TRIE_STATS
592 t->stats.resize_node_skipped++;
593#endif
594 break;
595 }
563 } 596 }
564 597
565 check_tnode(tn); 598 check_tnode(tn);
@@ -568,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
568 * Halve as long as the number of empty children in this 601 * Halve as long as the number of empty children in this
569 * node is above threshold. 602 * node is above threshold.
570 */ 603 */
604
605 err = 0;
571 while (tn->bits > 1 && 606 while (tn->bits > 1 &&
572 100 * (tnode_child_length(tn) - tn->empty_children) < 607 100 * (tnode_child_length(tn) - tn->empty_children) <
573 halve_threshold * tnode_child_length(tn)) 608 halve_threshold * tnode_child_length(tn)) {
609
610 tn = halve(t, tn, &err);
611
612 if(err) {
613#ifdef CONFIG_IP_FIB_TRIE_STATS
614 t->stats.resize_node_skipped++;
615#endif
616 break;
617 }
618 }
574 619
575 tn = halve(t, tn);
576 620
577 /* Only one child remains */ 621 /* Only one child remains */
578 622
@@ -597,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
597 return (struct node *) tn; 641 return (struct node *) tn;
598} 642}
599 643
600static struct tnode *inflate(struct trie *t, struct tnode *tn) 644static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
601{ 645{
602 struct tnode *inode; 646 struct tnode *inode;
603 struct tnode *oldtnode = tn; 647 struct tnode *oldtnode = tn;
@@ -609,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
609 653
610 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 654 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
611 655
612 if (!tn) 656 if (!tn) {
613 trie_bug("tnode_new failed"); 657 *err = -ENOMEM;
658 return oldtnode;
659 }
660
661 /*
662 * Preallocate and store tnodes before the actual work so we
663 * don't get into an inconsistent state if memory allocation
664 * fails. In case of failure we return the oldnode and inflate
665 * of tnode is ignored.
666 */
667
668 for(i = 0; i < olen; i++) {
669 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
670
671 if (inode &&
672 IS_TNODE(inode) &&
673 inode->pos == oldtnode->pos + oldtnode->bits &&
674 inode->bits > 1) {
675 struct tnode *left, *right;
676
677 t_key m = TKEY_GET_MASK(inode->pos, 1);
678
679 left = tnode_new(inode->key&(~m), inode->pos + 1,
680 inode->bits - 1);
681
682 if(!left) {
683 *err = -ENOMEM;
684 break;
685 }
686
687 right = tnode_new(inode->key|m, inode->pos + 1,
688 inode->bits - 1);
689
690 if(!right) {
691 *err = -ENOMEM;
692 break;
693 }
694
695 put_child(t, tn, 2*i, (struct node *) left);
696 put_child(t, tn, 2*i+1, (struct node *) right);
697 }
698 }
699
700 if(*err) {
701 int size = tnode_child_length(tn);
702 int j;
703
704 for(j = 0; j < size; j++)
705 if( tn->child[j])
706 tnode_free((struct tnode *)tn->child[j]);
707
708 tnode_free(tn);
709
710 *err = -ENOMEM;
711 return oldtnode;
712 }
614 713
615 for(i = 0; i < olen; i++) { 714 for(i = 0; i < olen; i++) {
616 struct node *node = tnode_get_child(oldtnode, i); 715 struct node *node = tnode_get_child(oldtnode, i);
@@ -623,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
623 722
624 if(IS_LEAF(node) || ((struct tnode *) node)->pos > 723 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
625 tn->pos + tn->bits - 1) { 724 tn->pos + tn->bits - 1) {
626 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, 725 if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
627 1) == 0) 726 1) == 0)
628 put_child(t, tn, 2*i, node); 727 put_child(t, tn, 2*i, node);
629 else 728 else
@@ -663,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
663 * the position (inode->pos) 762 * the position (inode->pos)
664 */ 763 */
665 764
666 t_key m = TKEY_GET_MASK(inode->pos, 1);
667
668 /* Use the old key, but set the new significant 765 /* Use the old key, but set the new significant
669 * bit to zero. 766 * bit to zero.
670 */ 767 */
671 left = tnode_new(inode->key&(~m), inode->pos + 1,
672 inode->bits - 1);
673 768
674 if(!left) 769 left = (struct tnode *) tnode_get_child(tn, 2*i);
675 trie_bug("tnode_new failed"); 770 put_child(t, tn, 2*i, NULL);
676 771
677 772 if(!left)
678 /* Use the old key, but set the new significant 773 BUG();
679 * bit to one. 774
680 */ 775 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
681 right = tnode_new(inode->key|m, inode->pos + 1, 776 put_child(t, tn, 2*i+1, NULL);
682 inode->bits - 1); 777
778 if(!right)
779 BUG();
683 780
684 if(!right)
685 trie_bug("tnode_new failed");
686
687 size = tnode_child_length(left); 781 size = tnode_child_length(left);
688 for(j = 0; j < size; j++) { 782 for(j = 0; j < size; j++) {
689 put_child(t, left, j, inode->child[j]); 783 put_child(t, left, j, inode->child[j]);
@@ -699,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
699 return tn; 793 return tn;
700} 794}
701 795
702static struct tnode *halve(struct trie *t, struct tnode *tn) 796static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
703{ 797{
704 struct tnode *oldtnode = tn; 798 struct tnode *oldtnode = tn;
705 struct node *left, *right; 799 struct node *left, *right;
@@ -710,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
710 804
711 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 805 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
712 806
713 if(!tn) 807 if (!tn) {
714 trie_bug("tnode_new failed"); 808 *err = -ENOMEM;
809 return oldtnode;
810 }
811
812 /*
813 * Preallocate and store tnodes before the actual work so we
814 * don't get into an inconsistent state if memory allocation
815 * fails. In case of failure we return the oldnode and halve
816 * of tnode is ignored.
817 */
818
819 for(i = 0; i < olen; i += 2) {
820 left = tnode_get_child(oldtnode, i);
821 right = tnode_get_child(oldtnode, i+1);
822
823 /* Two nonempty children */
824 if( left && right) {
825 struct tnode *newBinNode =
826 tnode_new(left->key, tn->pos + tn->bits, 1);
827
828 if(!newBinNode) {
829 *err = -ENOMEM;
830 break;
831 }
832 put_child(t, tn, i/2, (struct node *)newBinNode);
833 }
834 }
835
836 if(*err) {
837 int size = tnode_child_length(tn);
838 int j;
839
840 for(j = 0; j < size; j++)
841 if( tn->child[j])
842 tnode_free((struct tnode *)tn->child[j]);
843
844 tnode_free(tn);
845
846 *err = -ENOMEM;
847 return oldtnode;
848 }
715 849
716 for(i = 0; i < olen; i += 2) { 850 for(i = 0; i < olen; i += 2) {
717 left = tnode_get_child(oldtnode, i); 851 left = tnode_get_child(oldtnode, i);
@@ -728,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
728 /* Two nonempty children */ 862 /* Two nonempty children */
729 else { 863 else {
730 struct tnode *newBinNode = 864 struct tnode *newBinNode =
731 tnode_new(left->key, tn->pos + tn->bits, 1); 865 (struct tnode *) tnode_get_child(tn, i/2);
866 put_child(t, tn, i/2, NULL);
732 867
733 if(!newBinNode) 868 if(!newBinNode)
734 trie_bug("tnode_new failed"); 869 BUG();
735 870
736 put_child(t, newBinNode, 0, left); 871 put_child(t, newBinNode, 0, left);
737 put_child(t, newBinNode, 1, right); 872 put_child(t, newBinNode, 1, right);
@@ -879,8 +1014,8 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
879 return (struct node*) tn; 1014 return (struct node*) tn;
880} 1015}
881 1016
882static struct list_head * 1017static struct list_head *
883fib_insert_node(struct trie *t, u32 key, int plen) 1018fib_insert_node(struct trie *t, int *err, u32 key, int plen)
884{ 1019{
885 int pos, newpos; 1020 int pos, newpos;
886 struct tnode *tp = NULL, *tn = NULL; 1021 struct tnode *tp = NULL, *tn = NULL;
@@ -940,7 +1075,6 @@ fib_insert_node(struct trie *t, u32 key, int plen)
940 if(tp && IS_LEAF(tp)) 1075 if(tp && IS_LEAF(tp))
941 BUG(); 1076 BUG();
942 1077
943 t->revision++;
944 1078
945 /* Case 1: n is a leaf. Compare prefixes */ 1079 /* Case 1: n is a leaf. Compare prefixes */
946 1080
@@ -949,8 +1083,10 @@ fib_insert_node(struct trie *t, u32 key, int plen)
949 1083
950 li = leaf_info_new(plen); 1084 li = leaf_info_new(plen);
951 1085
952 if(! li) 1086 if(! li) {
953 BUG(); 1087 *err = -ENOMEM;
1088 goto err;
1089 }
954 1090
955 fa_head = &li->falh; 1091 fa_head = &li->falh;
956 insert_leaf_info(&l->list, li); 1092 insert_leaf_info(&l->list, li);
@@ -959,14 +1095,19 @@ fib_insert_node(struct trie *t, u32 key, int plen)
959 t->size++; 1095 t->size++;
960 l = leaf_new(); 1096 l = leaf_new();
961 1097
962 if(! l) 1098 if(! l) {
963 BUG(); 1099 *err = -ENOMEM;
1100 goto err;
1101 }
964 1102
965 l->key = key; 1103 l->key = key;
966 li = leaf_info_new(plen); 1104 li = leaf_info_new(plen);
967 1105
968 if(! li) 1106 if(! li) {
969 BUG(); 1107 tnode_free((struct tnode *) l);
1108 *err = -ENOMEM;
1109 goto err;
1110 }
970 1111
971 fa_head = &li->falh; 1112 fa_head = &li->falh;
972 insert_leaf_info(&l->list, li); 1113 insert_leaf_info(&l->list, li);
@@ -1003,9 +1144,14 @@ fib_insert_node(struct trie *t, u32 key, int plen)
1003 newpos = 0; 1144 newpos = 0;
1004 tn = tnode_new(key, newpos, 1); /* First tnode */ 1145 tn = tnode_new(key, newpos, 1); /* First tnode */
1005 } 1146 }
1006 if(!tn)
1007 trie_bug("tnode_pfx_new failed");
1008 1147
1148 if(!tn) {
1149 free_leaf_info(li);
1150 tnode_free((struct tnode *) l);
1151 *err = -ENOMEM;
1152 goto err;
1153 }
1154
1009 NODE_SET_PARENT(tn, tp); 1155 NODE_SET_PARENT(tn, tp);
1010 1156
1011 missbit=tkey_extract_bits(key, newpos, 1); 1157 missbit=tkey_extract_bits(key, newpos, 1);
@@ -1027,7 +1173,9 @@ fib_insert_node(struct trie *t, u32 key, int plen)
1027 } 1173 }
1028 /* Rebalance the trie */ 1174 /* Rebalance the trie */
1029 t->trie = trie_rebalance(t, tp); 1175 t->trie = trie_rebalance(t, tp);
1030done:; 1176done:
1177 t->revision++;
1178err:;
1031 return fa_head; 1179 return fa_head;
1032} 1180}
1033 1181
@@ -1156,8 +1304,12 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1156 * Insert new entry to the list. 1304 * Insert new entry to the list.
1157 */ 1305 */
1158 1306
1159 if(!fa_head) 1307 if(!fa_head) {
1160 fa_head = fib_insert_node(t, key, plen); 1308 fa_head = fib_insert_node(t, &err, key, plen);
1309 err = 0;
1310 if(err)
1311 goto out_free_new_fa;
1312 }
1161 1313
1162 write_lock_bh(&fib_lock); 1314 write_lock_bh(&fib_lock);
1163 1315
@@ -1170,6 +1322,9 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1170 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); 1322 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1171succeeded: 1323succeeded:
1172 return 0; 1324 return 0;
1325
1326out_free_new_fa:
1327 kmem_cache_free(fn_alias_kmem, new_fa);
1173out: 1328out:
1174 fib_release_info(fi); 1329 fib_release_info(fi);
1175err:; 1330err:;
@@ -2279,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
2279 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2434 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2280 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2435 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2281 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2436 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2437 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2282#ifdef CLEAR_STATS 2438#ifdef CLEAR_STATS
2283 memset(&(t->stats), 0, sizeof(t->stats)); 2439 memset(&(t->stats), 0, sizeof(t->stats));
2284#endif 2440#endif