diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
| -rw-r--r-- | net/ipv4/fib_trie.c | 202 |
1 files changed, 168 insertions, 34 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index b56e88edf1b3..4be234c7d8c3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -43,7 +43,7 @@ | |||
| 43 | * 2 of the License, or (at your option) any later version. | 43 | * 2 of the License, or (at your option) any later version. |
| 44 | */ | 44 | */ |
| 45 | 45 | ||
| 46 | #define VERSION "0.324" | 46 | #define VERSION "0.325" |
| 47 | 47 | ||
| 48 | #include <linux/config.h> | 48 | #include <linux/config.h> |
| 49 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
| @@ -136,6 +136,7 @@ struct trie_use_stats { | |||
| 136 | unsigned int semantic_match_passed; | 136 | unsigned int semantic_match_passed; |
| 137 | unsigned int semantic_match_miss; | 137 | unsigned int semantic_match_miss; |
| 138 | unsigned int null_node_hit; | 138 | unsigned int null_node_hit; |
| 139 | unsigned int resize_node_skipped; | ||
| 139 | }; | 140 | }; |
| 140 | #endif | 141 | #endif |
| 141 | 142 | ||
| @@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | |||
| 164 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); | 165 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); |
| 165 | static int tnode_child_length(struct tnode *tn); | 166 | static int tnode_child_length(struct tnode *tn); |
| 166 | static struct node *resize(struct trie *t, struct tnode *tn); | 167 | static struct node *resize(struct trie *t, struct tnode *tn); |
| 167 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 168 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err); |
| 168 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 169 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err); |
| 169 | static void tnode_free(struct tnode *tn); | 170 | static void tnode_free(struct tnode *tn); |
| 170 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); | 171 | static void trie_dump_seq(struct seq_file *seq, struct trie *t); |
| 171 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); | 172 | extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); |
| @@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li) | |||
| 358 | kfree(li); | 359 | kfree(li); |
| 359 | } | 360 | } |
| 360 | 361 | ||
| 362 | static struct tnode *tnode_alloc(unsigned int size) | ||
| 363 | { | ||
| 364 | if (size <= PAGE_SIZE) { | ||
| 365 | return kmalloc(size, GFP_KERNEL); | ||
| 366 | } else { | ||
| 367 | return (struct tnode *) | ||
| 368 | __get_free_pages(GFP_KERNEL, get_order(size)); | ||
| 369 | } | ||
| 370 | } | ||
| 371 | |||
| 372 | static void __tnode_free(struct tnode *tn) | ||
| 373 | { | ||
| 374 | unsigned int size = sizeof(struct tnode) + | ||
| 375 | (1<<tn->bits) * sizeof(struct node *); | ||
| 376 | |||
| 377 | if (size <= PAGE_SIZE) | ||
| 378 | kfree(tn); | ||
| 379 | else | ||
| 380 | free_pages((unsigned long)tn, get_order(size)); | ||
| 381 | } | ||
| 382 | |||
| 361 | static struct tnode* tnode_new(t_key key, int pos, int bits) | 383 | static struct tnode* tnode_new(t_key key, int pos, int bits) |
| 362 | { | 384 | { |
| 363 | int nchildren = 1<<bits; | 385 | int nchildren = 1<<bits; |
| 364 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); | 386 | int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); |
| 365 | struct tnode *tn = kmalloc(sz, GFP_KERNEL); | 387 | struct tnode *tn = tnode_alloc(sz); |
| 366 | 388 | ||
| 367 | if(tn) { | 389 | if(tn) { |
| 368 | memset(tn, 0, sz); | 390 | memset(tn, 0, sz); |
| @@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn) | |||
| 390 | printk("FL %p \n", tn); | 412 | printk("FL %p \n", tn); |
| 391 | } | 413 | } |
| 392 | else if(IS_TNODE(tn)) { | 414 | else if(IS_TNODE(tn)) { |
| 393 | kfree(tn); | 415 | __tnode_free(tn); |
| 394 | if(trie_debug > 0 ) | 416 | if(trie_debug > 0 ) |
| 395 | printk("FT %p \n", tn); | 417 | printk("FT %p \n", tn); |
| 396 | } | 418 | } |
| @@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w | |||
| 460 | static struct node *resize(struct trie *t, struct tnode *tn) | 482 | static struct node *resize(struct trie *t, struct tnode *tn) |
| 461 | { | 483 | { |
| 462 | int i; | 484 | int i; |
| 485 | int err = 0; | ||
| 463 | 486 | ||
| 464 | if (!tn) | 487 | if (!tn) |
| 465 | return NULL; | 488 | return NULL; |
| @@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 556 | */ | 579 | */ |
| 557 | 580 | ||
| 558 | check_tnode(tn); | 581 | check_tnode(tn); |
| 559 | 582 | ||
| 583 | err = 0; | ||
| 560 | while ((tn->full_children > 0 && | 584 | while ((tn->full_children > 0 && |
| 561 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= | 585 | 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= |
| 562 | inflate_threshold * tnode_child_length(tn))) { | 586 | inflate_threshold * tnode_child_length(tn))) { |
| 563 | 587 | ||
| 564 | tn = inflate(t, tn); | 588 | tn = inflate(t, tn, &err); |
| 589 | |||
| 590 | if(err) { | ||
| 591 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
| 592 | t->stats.resize_node_skipped++; | ||
| 593 | #endif | ||
| 594 | break; | ||
| 595 | } | ||
| 565 | } | 596 | } |
| 566 | 597 | ||
| 567 | check_tnode(tn); | 598 | check_tnode(tn); |
| @@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 570 | * Halve as long as the number of empty children in this | 601 | * Halve as long as the number of empty children in this |
| 571 | * node is above threshold. | 602 | * node is above threshold. |
| 572 | */ | 603 | */ |
| 604 | |||
| 605 | err = 0; | ||
| 573 | while (tn->bits > 1 && | 606 | while (tn->bits > 1 && |
| 574 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 607 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
| 575 | halve_threshold * tnode_child_length(tn)) | 608 | halve_threshold * tnode_child_length(tn)) { |
| 609 | |||
| 610 | tn = halve(t, tn, &err); | ||
| 611 | |||
| 612 | if(err) { | ||
| 613 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
| 614 | t->stats.resize_node_skipped++; | ||
| 615 | #endif | ||
| 616 | break; | ||
| 617 | } | ||
| 618 | } | ||
| 576 | 619 | ||
| 577 | tn = halve(t, tn); | ||
| 578 | 620 | ||
| 579 | /* Only one child remains */ | 621 | /* Only one child remains */ |
| 580 | 622 | ||
| @@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
| 599 | return (struct node *) tn; | 641 | return (struct node *) tn; |
| 600 | } | 642 | } |
| 601 | 643 | ||
| 602 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 644 | static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err) |
| 603 | { | 645 | { |
| 604 | struct tnode *inode; | 646 | struct tnode *inode; |
| 605 | struct tnode *oldtnode = tn; | 647 | struct tnode *oldtnode = tn; |
| @@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 611 | 653 | ||
| 612 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); | 654 | tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); |
| 613 | 655 | ||
| 614 | if (!tn) | 656 | if (!tn) { |
| 615 | trie_bug("tnode_new failed"); | 657 | *err = -ENOMEM; |
| 658 | return oldtnode; | ||
| 659 | } | ||
| 660 | |||
| 661 | /* | ||
| 662 | * Preallocate and store tnodes before the actual work so we | ||
| 663 | * don't get into an inconsistent state if memory allocation | ||
| 664 | * fails. In case of failure we return the oldnode and inflate | ||
| 665 | * of tnode is ignored. | ||
| 666 | */ | ||
| 667 | |||
| 668 | for(i = 0; i < olen; i++) { | ||
| 669 | struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i); | ||
| 670 | |||
| 671 | if (inode && | ||
| 672 | IS_TNODE(inode) && | ||
| 673 | inode->pos == oldtnode->pos + oldtnode->bits && | ||
| 674 | inode->bits > 1) { | ||
| 675 | struct tnode *left, *right; | ||
| 676 | |||
| 677 | t_key m = TKEY_GET_MASK(inode->pos, 1); | ||
| 678 | |||
| 679 | left = tnode_new(inode->key&(~m), inode->pos + 1, | ||
| 680 | inode->bits - 1); | ||
| 681 | |||
| 682 | if(!left) { | ||
| 683 | *err = -ENOMEM; | ||
| 684 | break; | ||
| 685 | } | ||
| 686 | |||
| 687 | right = tnode_new(inode->key|m, inode->pos + 1, | ||
| 688 | inode->bits - 1); | ||
| 689 | |||
| 690 | if(!right) { | ||
| 691 | *err = -ENOMEM; | ||
| 692 | break; | ||
| 693 | } | ||
| 694 | |||
| 695 | put_child(t, tn, 2*i, (struct node *) left); | ||
| 696 | put_child(t, tn, 2*i+1, (struct node *) right); | ||
| 697 | } | ||
| 698 | } | ||
| 699 | |||
| 700 | if(*err) { | ||
| 701 | int size = tnode_child_length(tn); | ||
| 702 | int j; | ||
| 703 | |||
| 704 | for(j = 0; j < size; j++) | ||
| 705 | if( tn->child[j]) | ||
| 706 | tnode_free((struct tnode *)tn->child[j]); | ||
| 707 | |||
| 708 | tnode_free(tn); | ||
| 709 | |||
| 710 | *err = -ENOMEM; | ||
| 711 | return oldtnode; | ||
| 712 | } | ||
| 616 | 713 | ||
| 617 | for(i = 0; i < olen; i++) { | 714 | for(i = 0; i < olen; i++) { |
| 618 | struct node *node = tnode_get_child(oldtnode, i); | 715 | struct node *node = tnode_get_child(oldtnode, i); |
| @@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 625 | 722 | ||
| 626 | if(IS_LEAF(node) || ((struct tnode *) node)->pos > | 723 | if(IS_LEAF(node) || ((struct tnode *) node)->pos > |
| 627 | tn->pos + tn->bits - 1) { | 724 | tn->pos + tn->bits - 1) { |
| 628 | if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, | 725 | if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits, |
| 629 | 1) == 0) | 726 | 1) == 0) |
| 630 | put_child(t, tn, 2*i, node); | 727 | put_child(t, tn, 2*i, node); |
| 631 | else | 728 | else |
| @@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 665 | * the position (inode->pos) | 762 | * the position (inode->pos) |
| 666 | */ | 763 | */ |
| 667 | 764 | ||
| 668 | t_key m = TKEY_GET_MASK(inode->pos, 1); | ||
| 669 | |||
| 670 | /* Use the old key, but set the new significant | 765 | /* Use the old key, but set the new significant |
| 671 | * bit to zero. | 766 | * bit to zero. |
| 672 | */ | 767 | */ |
| 673 | left = tnode_new(inode->key&(~m), inode->pos + 1, | ||
| 674 | inode->bits - 1); | ||
| 675 | 768 | ||
| 676 | if(!left) | 769 | left = (struct tnode *) tnode_get_child(tn, 2*i); |
| 677 | trie_bug("tnode_new failed"); | 770 | put_child(t, tn, 2*i, NULL); |
| 678 | 771 | ||
| 679 | 772 | if(!left) | |
| 680 | /* Use the old key, but set the new significant | 773 | BUG(); |
| 681 | * bit to one. | 774 | |
| 682 | */ | 775 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); |
| 683 | right = tnode_new(inode->key|m, inode->pos + 1, | 776 | put_child(t, tn, 2*i+1, NULL); |
| 684 | inode->bits - 1); | 777 | |
| 778 | if(!right) | ||
| 779 | BUG(); | ||
| 685 | 780 | ||
| 686 | if(!right) | ||
| 687 | trie_bug("tnode_new failed"); | ||
| 688 | |||
| 689 | size = tnode_child_length(left); | 781 | size = tnode_child_length(left); |
| 690 | for(j = 0; j < size; j++) { | 782 | for(j = 0; j < size; j++) { |
| 691 | put_child(t, left, j, inode->child[j]); | 783 | put_child(t, left, j, inode->child[j]); |
| @@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 701 | return tn; | 793 | return tn; |
| 702 | } | 794 | } |
| 703 | 795 | ||
| 704 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 796 | static struct tnode *halve(struct trie *t, struct tnode *tn, int *err) |
| 705 | { | 797 | { |
| 706 | struct tnode *oldtnode = tn; | 798 | struct tnode *oldtnode = tn; |
| 707 | struct node *left, *right; | 799 | struct node *left, *right; |
| @@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
| 712 | 804 | ||
| 713 | tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); | 805 | tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); |
| 714 | 806 | ||
| 715 | if(!tn) | 807 | if (!tn) { |
| 716 | trie_bug("tnode_new failed"); | 808 | *err = -ENOMEM; |
| 809 | return oldtnode; | ||
| 810 | } | ||
| 811 | |||
| 812 | /* | ||
| 813 | * Preallocate and store tnodes before the actual work so we | ||
| 814 | * don't get into an inconsistent state if memory allocation | ||
| 815 | * fails. In case of failure we return the oldnode and halve | ||
| 816 | * of tnode is ignored. | ||
| 817 | */ | ||
| 818 | |||
| 819 | for(i = 0; i < olen; i += 2) { | ||
| 820 | left = tnode_get_child(oldtnode, i); | ||
| 821 | right = tnode_get_child(oldtnode, i+1); | ||
| 822 | |||
| 823 | /* Two nonempty children */ | ||
| 824 | if( left && right) { | ||
| 825 | struct tnode *newBinNode = | ||
| 826 | tnode_new(left->key, tn->pos + tn->bits, 1); | ||
| 827 | |||
| 828 | if(!newBinNode) { | ||
| 829 | *err = -ENOMEM; | ||
| 830 | break; | ||
| 831 | } | ||
| 832 | put_child(t, tn, i/2, (struct node *)newBinNode); | ||
| 833 | } | ||
| 834 | } | ||
| 835 | |||
| 836 | if(*err) { | ||
| 837 | int size = tnode_child_length(tn); | ||
| 838 | int j; | ||
| 839 | |||
| 840 | for(j = 0; j < size; j++) | ||
| 841 | if( tn->child[j]) | ||
| 842 | tnode_free((struct tnode *)tn->child[j]); | ||
| 843 | |||
| 844 | tnode_free(tn); | ||
| 845 | |||
| 846 | *err = -ENOMEM; | ||
| 847 | return oldtnode; | ||
| 848 | } | ||
| 717 | 849 | ||
| 718 | for(i = 0; i < olen; i += 2) { | 850 | for(i = 0; i < olen; i += 2) { |
| 719 | left = tnode_get_child(oldtnode, i); | 851 | left = tnode_get_child(oldtnode, i); |
| @@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
| 730 | /* Two nonempty children */ | 862 | /* Two nonempty children */ |
| 731 | else { | 863 | else { |
| 732 | struct tnode *newBinNode = | 864 | struct tnode *newBinNode = |
| 733 | tnode_new(left->key, tn->pos + tn->bits, 1); | 865 | (struct tnode *) tnode_get_child(tn, i/2); |
| 866 | put_child(t, tn, i/2, NULL); | ||
| 734 | 867 | ||
| 735 | if(!newBinNode) | 868 | if(!newBinNode) |
| 736 | trie_bug("tnode_new failed"); | 869 | BUG(); |
| 737 | 870 | ||
| 738 | put_child(t, newBinNode, 0, left); | 871 | put_child(t, newBinNode, 0, left); |
| 739 | put_child(t, newBinNode, 1, right); | 872 | put_child(t, newBinNode, 1, right); |
| @@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq) | |||
| 2301 | seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); | 2434 | seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); |
| 2302 | seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); | 2435 | seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); |
| 2303 | seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); | 2436 | seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); |
| 2437 | seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped); | ||
| 2304 | #ifdef CLEAR_STATS | 2438 | #ifdef CLEAR_STATS |
| 2305 | memset(&(t->stats), 0, sizeof(t->stats)); | 2439 | memset(&(t->stats), 0, sizeof(t->stats)); |
| 2306 | #endif | 2440 | #endif |
