aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/fib_trie.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r--net/ipv4/fib_trie.c458
1 files changed, 209 insertions, 249 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862bc..58c25ea5a5c1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -12,11 +12,11 @@
12 * 12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet 13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 * 14 *
15 * This work is based on the LPC-trie which is originally descibed in: 15 * This work is based on the LPC-trie which is originally described in:
16 * 16 *
17 * An experimental study of compression methods for dynamic tries 17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 19 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
20 * 20 *
21 * 21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
@@ -72,6 +72,7 @@
72#include <linux/init.h> 72#include <linux/init.h>
73#include <linux/list.h> 73#include <linux/list.h>
74#include <linux/slab.h> 74#include <linux/slab.h>
75#include <linux/prefetch.h>
75#include <net/net_namespace.h> 76#include <net/net_namespace.h>
76#include <net/ip.h> 77#include <net/ip.h>
77#include <net/protocol.h> 78#include <net/protocol.h>
@@ -95,7 +96,7 @@ typedef unsigned int t_key;
95#define IS_TNODE(n) (!(n->parent & T_LEAF)) 96#define IS_TNODE(n) (!(n->parent & T_LEAF))
96#define IS_LEAF(n) (n->parent & T_LEAF) 97#define IS_LEAF(n) (n->parent & T_LEAF)
97 98
98struct node { 99struct rt_trie_node {
99 unsigned long parent; 100 unsigned long parent;
100 t_key key; 101 t_key key;
101}; 102};
@@ -126,7 +127,7 @@ struct tnode {
126 struct work_struct work; 127 struct work_struct work;
127 struct tnode *tnode_free; 128 struct tnode *tnode_free;
128 }; 129 };
129 struct node *child[0]; 130 struct rt_trie_node __rcu *child[0];
130}; 131};
131 132
132#ifdef CONFIG_IP_FIB_TRIE_STATS 133#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,16 +152,16 @@ struct trie_stat {
151}; 152};
152 153
153struct trie { 154struct trie {
154 struct node *trie; 155 struct rt_trie_node __rcu *trie;
155#ifdef CONFIG_IP_FIB_TRIE_STATS 156#ifdef CONFIG_IP_FIB_TRIE_STATS
156 struct trie_use_stats stats; 157 struct trie_use_stats stats;
157#endif 158#endif
158}; 159};
159 160
160static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); 161static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
161static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 162static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
162 int wasfull); 163 int wasfull);
163static struct node *resize(struct trie *t, struct tnode *tn); 164static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
164static struct tnode *inflate(struct trie *t, struct tnode *tn); 165static struct tnode *inflate(struct trie *t, struct tnode *tn);
165static struct tnode *halve(struct trie *t, struct tnode *tn); 166static struct tnode *halve(struct trie *t, struct tnode *tn);
166/* tnodes to free after resize(); protected by RTNL */ 167/* tnodes to free after resize(); protected by RTNL */
@@ -177,43 +178,58 @@ static const int sync_pages = 128;
177static struct kmem_cache *fn_alias_kmem __read_mostly; 178static struct kmem_cache *fn_alias_kmem __read_mostly;
178static struct kmem_cache *trie_leaf_kmem __read_mostly; 179static struct kmem_cache *trie_leaf_kmem __read_mostly;
179 180
180static inline struct tnode *node_parent(struct node *node) 181/*
182 * caller must hold RTNL
183 */
184static inline struct tnode *node_parent(const struct rt_trie_node *node)
181{ 185{
182 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); 186 unsigned long parent;
187
188 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
189
190 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
183} 191}
184 192
185static inline struct tnode *node_parent_rcu(struct node *node) 193/*
194 * caller must hold RCU read lock or RTNL
195 */
196static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
186{ 197{
187 struct tnode *ret = node_parent(node); 198 unsigned long parent;
199
200 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
201 lockdep_rtnl_is_held());
188 202
189 return rcu_dereference_check(ret, 203 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
190 rcu_read_lock_held() ||
191 lockdep_rtnl_is_held());
192} 204}
193 205
194/* Same as rcu_assign_pointer 206/* Same as rcu_assign_pointer
195 * but that macro() assumes that value is a pointer. 207 * but that macro() assumes that value is a pointer.
196 */ 208 */
197static inline void node_set_parent(struct node *node, struct tnode *ptr) 209static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
198{ 210{
199 smp_wmb(); 211 smp_wmb();
200 node->parent = (unsigned long)ptr | NODE_TYPE(node); 212 node->parent = (unsigned long)ptr | NODE_TYPE(node);
201} 213}
202 214
203static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) 215/*
216 * caller must hold RTNL
217 */
218static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
204{ 219{
205 BUG_ON(i >= 1U << tn->bits); 220 BUG_ON(i >= 1U << tn->bits);
206 221
207 return tn->child[i]; 222 return rtnl_dereference(tn->child[i]);
208} 223}
209 224
210static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) 225/*
226 * caller must hold RCU read lock or RTNL
227 */
228static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
211{ 229{
212 struct node *ret = tnode_get_child(tn, i); 230 BUG_ON(i >= 1U << tn->bits);
213 231
214 return rcu_dereference_check(ret, 232 return rcu_dereference_rtnl(tn->child[i]);
215 rcu_read_lock_held() ||
216 lockdep_rtnl_is_held());
217} 233}
218 234
219static inline int tnode_child_length(const struct tnode *tn) 235static inline int tnode_child_length(const struct tnode *tn)
@@ -221,12 +237,12 @@ static inline int tnode_child_length(const struct tnode *tn)
221 return 1 << tn->bits; 237 return 1 << tn->bits;
222} 238}
223 239
224static inline t_key mask_pfx(t_key k, unsigned short l) 240static inline t_key mask_pfx(t_key k, unsigned int l)
225{ 241{
226 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 242 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
227} 243}
228 244
229static inline t_key tkey_extract_bits(t_key a, int offset, int bits) 245static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
230{ 246{
231 if (offset < KEYLENGTH) 247 if (offset < KEYLENGTH)
232 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 248 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
@@ -354,14 +370,9 @@ static inline void free_leaf(struct leaf *l)
354 call_rcu_bh(&l->rcu, __leaf_free_rcu); 370 call_rcu_bh(&l->rcu, __leaf_free_rcu);
355} 371}
356 372
357static void __leaf_info_free_rcu(struct rcu_head *head)
358{
359 kfree(container_of(head, struct leaf_info, rcu));
360}
361
362static inline void free_leaf_info(struct leaf_info *leaf) 373static inline void free_leaf_info(struct leaf_info *leaf)
363{ 374{
364 call_rcu(&leaf->rcu, __leaf_info_free_rcu); 375 kfree_rcu(leaf, rcu);
365} 376}
366 377
367static struct tnode *tnode_alloc(size_t size) 378static struct tnode *tnode_alloc(size_t size)
@@ -369,7 +380,7 @@ static struct tnode *tnode_alloc(size_t size)
369 if (size <= PAGE_SIZE) 380 if (size <= PAGE_SIZE)
370 return kzalloc(size, GFP_KERNEL); 381 return kzalloc(size, GFP_KERNEL);
371 else 382 else
372 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); 383 return vzalloc(size);
373} 384}
374 385
375static void __tnode_vfree(struct work_struct *arg) 386static void __tnode_vfree(struct work_struct *arg)
@@ -382,7 +393,7 @@ static void __tnode_free_rcu(struct rcu_head *head)
382{ 393{
383 struct tnode *tn = container_of(head, struct tnode, rcu); 394 struct tnode *tn = container_of(head, struct tnode, rcu);
384 size_t size = sizeof(struct tnode) + 395 size_t size = sizeof(struct tnode) +
385 (sizeof(struct node *) << tn->bits); 396 (sizeof(struct rt_trie_node *) << tn->bits);
386 397
387 if (size <= PAGE_SIZE) 398 if (size <= PAGE_SIZE)
388 kfree(tn); 399 kfree(tn);
@@ -406,7 +417,7 @@ static void tnode_free_safe(struct tnode *tn)
406 tn->tnode_free = tnode_free_head; 417 tn->tnode_free = tnode_free_head;
407 tnode_free_head = tn; 418 tnode_free_head = tn;
408 tnode_free_size += sizeof(struct tnode) + 419 tnode_free_size += sizeof(struct tnode) +
409 (sizeof(struct node *) << tn->bits); 420 (sizeof(struct rt_trie_node *) << tn->bits);
410} 421}
411 422
412static void tnode_free_flush(void) 423static void tnode_free_flush(void)
@@ -447,7 +458,7 @@ static struct leaf_info *leaf_info_new(int plen)
447 458
448static struct tnode *tnode_new(t_key key, int pos, int bits) 459static struct tnode *tnode_new(t_key key, int pos, int bits)
449{ 460{
450 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); 461 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
451 struct tnode *tn = tnode_alloc(sz); 462 struct tnode *tn = tnode_alloc(sz);
452 463
453 if (tn) { 464 if (tn) {
@@ -459,8 +470,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
459 tn->empty_children = 1<<bits; 470 tn->empty_children = 1<<bits;
460 } 471 }
461 472
462 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 473 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
463 (unsigned long) (sizeof(struct node) << bits)); 474 sizeof(struct rt_trie_node) << bits);
464 return tn; 475 return tn;
465} 476}
466 477
@@ -469,7 +480,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
469 * and no bits are skipped. See discussion in dyntree paper p. 6 480 * and no bits are skipped. See discussion in dyntree paper p. 6
470 */ 481 */
471 482
472static inline int tnode_full(const struct tnode *tn, const struct node *n) 483static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
473{ 484{
474 if (n == NULL || IS_LEAF(n)) 485 if (n == NULL || IS_LEAF(n))
475 return 0; 486 return 0;
@@ -478,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n)
478} 489}
479 490
480static inline void put_child(struct trie *t, struct tnode *tn, int i, 491static inline void put_child(struct trie *t, struct tnode *tn, int i,
481 struct node *n) 492 struct rt_trie_node *n)
482{ 493{
483 tnode_put_child_reorg(tn, i, n, -1); 494 tnode_put_child_reorg(tn, i, n, -1);
484} 495}
@@ -488,10 +499,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
488 * Update the value of full_children and empty_children. 499 * Update the value of full_children and empty_children.
489 */ 500 */
490 501
491static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, 502static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
492 int wasfull) 503 int wasfull)
493{ 504{
494 struct node *chi = tn->child[i]; 505 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
495 int isfull; 506 int isfull;
496 507
497 BUG_ON(i >= 1<<tn->bits); 508 BUG_ON(i >= 1<<tn->bits);
@@ -519,7 +530,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
519} 530}
520 531
521#define MAX_WORK 10 532#define MAX_WORK 10
522static struct node *resize(struct trie *t, struct tnode *tn) 533static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
523{ 534{
524 int i; 535 int i;
525 struct tnode *old_tn; 536 struct tnode *old_tn;
@@ -609,11 +620,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
609 620
610 /* Keep root node larger */ 621 /* Keep root node larger */
611 622
612 if (!node_parent((struct node*) tn)) { 623 if (!node_parent((struct rt_trie_node *)tn)) {
613 inflate_threshold_use = inflate_threshold_root; 624 inflate_threshold_use = inflate_threshold_root;
614 halve_threshold_use = halve_threshold_root; 625 halve_threshold_use = halve_threshold_root;
615 } 626 } else {
616 else {
617 inflate_threshold_use = inflate_threshold; 627 inflate_threshold_use = inflate_threshold;
618 halve_threshold_use = halve_threshold; 628 halve_threshold_use = halve_threshold;
619 } 629 }
@@ -639,8 +649,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
639 check_tnode(tn); 649 check_tnode(tn);
640 650
641 /* Return if at least one inflate is run */ 651 /* Return if at least one inflate is run */
642 if( max_work != MAX_WORK) 652 if (max_work != MAX_WORK)
643 return (struct node *) tn; 653 return (struct rt_trie_node *) tn;
644 654
645 /* 655 /*
646 * Halve as long as the number of empty children in this 656 * Halve as long as the number of empty children in this
@@ -668,9 +678,9 @@ static struct node *resize(struct trie *t, struct tnode *tn)
668 if (tn->empty_children == tnode_child_length(tn) - 1) { 678 if (tn->empty_children == tnode_child_length(tn) - 1) {
669one_child: 679one_child:
670 for (i = 0; i < tnode_child_length(tn); i++) { 680 for (i = 0; i < tnode_child_length(tn); i++) {
671 struct node *n; 681 struct rt_trie_node *n;
672 682
673 n = tn->child[i]; 683 n = rtnl_dereference(tn->child[i]);
674 if (!n) 684 if (!n)
675 continue; 685 continue;
676 686
@@ -681,7 +691,21 @@ one_child:
681 return n; 691 return n;
682 } 692 }
683 } 693 }
684 return (struct node *) tn; 694 return (struct rt_trie_node *) tn;
695}
696
697
698static void tnode_clean_free(struct tnode *tn)
699{
700 int i;
701 struct tnode *tofree;
702
703 for (i = 0; i < tnode_child_length(tn); i++) {
704 tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
705 if (tofree)
706 tnode_free(tofree);
707 }
708 tnode_free(tn);
685} 709}
686 710
687static struct tnode *inflate(struct trie *t, struct tnode *tn) 711static struct tnode *inflate(struct trie *t, struct tnode *tn)
@@ -728,14 +752,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
728 goto nomem; 752 goto nomem;
729 } 753 }
730 754
731 put_child(t, tn, 2*i, (struct node *) left); 755 put_child(t, tn, 2*i, (struct rt_trie_node *) left);
732 put_child(t, tn, 2*i+1, (struct node *) right); 756 put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
733 } 757 }
734 } 758 }
735 759
736 for (i = 0; i < olen; i++) { 760 for (i = 0; i < olen; i++) {
737 struct tnode *inode; 761 struct tnode *inode;
738 struct node *node = tnode_get_child(oldtnode, i); 762 struct rt_trie_node *node = tnode_get_child(oldtnode, i);
739 struct tnode *left, *right; 763 struct tnode *left, *right;
740 int size, j; 764 int size, j;
741 765
@@ -760,8 +784,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
760 inode = (struct tnode *) node; 784 inode = (struct tnode *) node;
761 785
762 if (inode->bits == 1) { 786 if (inode->bits == 1) {
763 put_child(t, tn, 2*i, inode->child[0]); 787 put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
764 put_child(t, tn, 2*i+1, inode->child[1]); 788 put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
765 789
766 tnode_free_safe(inode); 790 tnode_free_safe(inode);
767 continue; 791 continue;
@@ -802,8 +826,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
802 826
803 size = tnode_child_length(left); 827 size = tnode_child_length(left);
804 for (j = 0; j < size; j++) { 828 for (j = 0; j < size; j++) {
805 put_child(t, left, j, inode->child[j]); 829 put_child(t, left, j, rtnl_dereference(inode->child[j]));
806 put_child(t, right, j, inode->child[j + size]); 830 put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
807 } 831 }
808 put_child(t, tn, 2*i, resize(t, left)); 832 put_child(t, tn, 2*i, resize(t, left));
809 put_child(t, tn, 2*i+1, resize(t, right)); 833 put_child(t, tn, 2*i+1, resize(t, right));
@@ -813,24 +837,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
813 tnode_free_safe(oldtnode); 837 tnode_free_safe(oldtnode);
814 return tn; 838 return tn;
815nomem: 839nomem:
816 { 840 tnode_clean_free(tn);
817 int size = tnode_child_length(tn); 841 return ERR_PTR(-ENOMEM);
818 int j;
819
820 for (j = 0; j < size; j++)
821 if (tn->child[j])
822 tnode_free((struct tnode *)tn->child[j]);
823
824 tnode_free(tn);
825
826 return ERR_PTR(-ENOMEM);
827 }
828} 842}
829 843
830static struct tnode *halve(struct trie *t, struct tnode *tn) 844static struct tnode *halve(struct trie *t, struct tnode *tn)
831{ 845{
832 struct tnode *oldtnode = tn; 846 struct tnode *oldtnode = tn;
833 struct node *left, *right; 847 struct rt_trie_node *left, *right;
834 int i; 848 int i;
835 int olen = tnode_child_length(tn); 849 int olen = tnode_child_length(tn);
836 850
@@ -861,7 +875,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
861 if (!newn) 875 if (!newn)
862 goto nomem; 876 goto nomem;
863 877
864 put_child(t, tn, i/2, (struct node *)newn); 878 put_child(t, tn, i/2, (struct rt_trie_node *)newn);
865 } 879 }
866 880
867 } 881 }
@@ -895,18 +909,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
895 tnode_free_safe(oldtnode); 909 tnode_free_safe(oldtnode);
896 return tn; 910 return tn;
897nomem: 911nomem:
898 { 912 tnode_clean_free(tn);
899 int size = tnode_child_length(tn); 913 return ERR_PTR(-ENOMEM);
900 int j;
901
902 for (j = 0; j < size; j++)
903 if (tn->child[j])
904 tnode_free((struct tnode *)tn->child[j]);
905
906 tnode_free(tn);
907
908 return ERR_PTR(-ENOMEM);
909 }
910} 914}
911 915
912/* readside must use rcu_read_lock currently dump routines 916/* readside must use rcu_read_lock currently dump routines
@@ -963,12 +967,10 @@ fib_find_node(struct trie *t, u32 key)
963{ 967{
964 int pos; 968 int pos;
965 struct tnode *tn; 969 struct tnode *tn;
966 struct node *n; 970 struct rt_trie_node *n;
967 971
968 pos = 0; 972 pos = 0;
969 n = rcu_dereference_check(t->trie, 973 n = rcu_dereference_rtnl(t->trie);
970 rcu_read_lock_held() ||
971 lockdep_rtnl_is_held());
972 974
973 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 975 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
974 tn = (struct tnode *) n; 976 tn = (struct tnode *) n;
@@ -1000,17 +1002,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1000 1002
1001 key = tn->key; 1003 key = tn->key;
1002 1004
1003 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { 1005 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
1004 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1006 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1005 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 1007 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
1006 tn = (struct tnode *) resize(t, (struct tnode *)tn); 1008 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1007 1009
1008 tnode_put_child_reorg((struct tnode *)tp, cindex, 1010 tnode_put_child_reorg((struct tnode *)tp, cindex,
1009 (struct node *)tn, wasfull); 1011 (struct rt_trie_node *)tn, wasfull);
1010 1012
1011 tp = node_parent((struct node *) tn); 1013 tp = node_parent((struct rt_trie_node *) tn);
1012 if (!tp) 1014 if (!tp)
1013 rcu_assign_pointer(t->trie, (struct node *)tn); 1015 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1014 1016
1015 tnode_free_flush(); 1017 tnode_free_flush();
1016 if (!tp) 1018 if (!tp)
@@ -1022,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1022 if (IS_TNODE(tn)) 1024 if (IS_TNODE(tn))
1023 tn = (struct tnode *)resize(t, (struct tnode *)tn); 1025 tn = (struct tnode *)resize(t, (struct tnode *)tn);
1024 1026
1025 rcu_assign_pointer(t->trie, (struct node *)tn); 1027 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1026 tnode_free_flush(); 1028 tnode_free_flush();
1027} 1029}
1028 1030
@@ -1032,7 +1034,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1032{ 1034{
1033 int pos, newpos; 1035 int pos, newpos;
1034 struct tnode *tp = NULL, *tn = NULL; 1036 struct tnode *tp = NULL, *tn = NULL;
1035 struct node *n; 1037 struct rt_trie_node *n;
1036 struct leaf *l; 1038 struct leaf *l;
1037 int missbit; 1039 int missbit;
1038 struct list_head *fa_head = NULL; 1040 struct list_head *fa_head = NULL;
@@ -1040,7 +1042,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1040 t_key cindex; 1042 t_key cindex;
1041 1043
1042 pos = 0; 1044 pos = 0;
1043 n = t->trie; 1045 n = rtnl_dereference(t->trie);
1044 1046
1045 /* If we point to NULL, stop. Either the tree is empty and we should 1047 /* If we point to NULL, stop. Either the tree is empty and we should
1046 * just put a new leaf in if, or we have reached an empty child slot, 1048 * just put a new leaf in if, or we have reached an empty child slot,
@@ -1118,10 +1120,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1118 if (t->trie && n == NULL) { 1120 if (t->trie && n == NULL) {
1119 /* Case 2: n is NULL, and will just insert a new leaf */ 1121 /* Case 2: n is NULL, and will just insert a new leaf */
1120 1122
1121 node_set_parent((struct node *)l, tp); 1123 node_set_parent((struct rt_trie_node *)l, tp);
1122 1124
1123 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1125 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1124 put_child(t, (struct tnode *)tp, cindex, (struct node *)l); 1126 put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l);
1125 } else { 1127 } else {
1126 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ 1128 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1127 /* 1129 /*
@@ -1148,18 +1150,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1148 return NULL; 1150 return NULL;
1149 } 1151 }
1150 1152
1151 node_set_parent((struct node *)tn, tp); 1153 node_set_parent((struct rt_trie_node *)tn, tp);
1152 1154
1153 missbit = tkey_extract_bits(key, newpos, 1); 1155 missbit = tkey_extract_bits(key, newpos, 1);
1154 put_child(t, tn, missbit, (struct node *)l); 1156 put_child(t, tn, missbit, (struct rt_trie_node *)l);
1155 put_child(t, tn, 1-missbit, n); 1157 put_child(t, tn, 1-missbit, n);
1156 1158
1157 if (tp) { 1159 if (tp) {
1158 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 1160 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1159 put_child(t, (struct tnode *)tp, cindex, 1161 put_child(t, (struct tnode *)tp, cindex,
1160 (struct node *)tn); 1162 (struct rt_trie_node *)tn);
1161 } else { 1163 } else {
1162 rcu_assign_pointer(t->trie, (struct node *)tn); 1164 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1163 tp = tn; 1165 tp = tn;
1164 } 1166 }
1165 } 1167 }
@@ -1252,7 +1254,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1252 if (fa->fa_info->fib_priority != fi->fib_priority) 1254 if (fa->fa_info->fib_priority != fi->fib_priority)
1253 break; 1255 break;
1254 if (fa->fa_type == cfg->fc_type && 1256 if (fa->fa_type == cfg->fc_type &&
1255 fa->fa_scope == cfg->fc_scope &&
1256 fa->fa_info == fi) { 1257 fa->fa_info == fi) {
1257 fa_match = fa; 1258 fa_match = fa;
1258 break; 1259 break;
@@ -1278,7 +1279,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1278 new_fa->fa_tos = fa->fa_tos; 1279 new_fa->fa_tos = fa->fa_tos;
1279 new_fa->fa_info = fi; 1280 new_fa->fa_info = fi;
1280 new_fa->fa_type = cfg->fc_type; 1281 new_fa->fa_type = cfg->fc_type;
1281 new_fa->fa_scope = cfg->fc_scope;
1282 state = fa->fa_state; 1282 state = fa->fa_state;
1283 new_fa->fa_state = state & ~FA_S_ACCESSED; 1283 new_fa->fa_state = state & ~FA_S_ACCESSED;
1284 1284
@@ -1315,7 +1315,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1315 new_fa->fa_info = fi; 1315 new_fa->fa_info = fi;
1316 new_fa->fa_tos = tos; 1316 new_fa->fa_tos = tos;
1317 new_fa->fa_type = cfg->fc_type; 1317 new_fa->fa_type = cfg->fc_type;
1318 new_fa->fa_scope = cfg->fc_scope;
1319 new_fa->fa_state = 0; 1318 new_fa->fa_state = 0;
1320 /* 1319 /*
1321 * Insert new entry to the list. 1320 * Insert new entry to the list.
@@ -1329,6 +1328,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
1329 } 1328 }
1330 } 1329 }
1331 1330
1331 if (!plen)
1332 tb->tb_num_default++;
1333
1332 list_add_tail_rcu(&new_fa->fa_list, 1334 list_add_tail_rcu(&new_fa->fa_list,
1333 (fa ? &fa->fa_list : fa_head)); 1335 (fa ? &fa->fa_list : fa_head));
1334 1336
@@ -1347,52 +1349,86 @@ err:
1347} 1349}
1348 1350
1349/* should be called with rcu_read_lock */ 1351/* should be called with rcu_read_lock */
1350static int check_leaf(struct trie *t, struct leaf *l, 1352static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
1351 t_key key, const struct flowi *flp, 1353 t_key key, const struct flowi4 *flp,
1352 struct fib_result *res) 1354 struct fib_result *res, int fib_flags)
1353{ 1355{
1354 struct leaf_info *li; 1356 struct leaf_info *li;
1355 struct hlist_head *hhead = &l->list; 1357 struct hlist_head *hhead = &l->list;
1356 struct hlist_node *node; 1358 struct hlist_node *node;
1357 1359
1358 hlist_for_each_entry_rcu(li, node, hhead, hlist) { 1360 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
1359 int err; 1361 struct fib_alias *fa;
1360 int plen = li->plen; 1362 int plen = li->plen;
1361 __be32 mask = inet_make_mask(plen); 1363 __be32 mask = inet_make_mask(plen);
1362 1364
1363 if (l->key != (key & ntohl(mask))) 1365 if (l->key != (key & ntohl(mask)))
1364 continue; 1366 continue;
1365 1367
1366 err = fib_semantic_match(&li->falh, flp, res, plen); 1368 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1369 struct fib_info *fi = fa->fa_info;
1370 int nhsel, err;
1367 1371
1372 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
1373 continue;
1374 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1375 continue;
1376 fib_alias_accessed(fa);
1377 err = fib_props[fa->fa_type].error;
1378 if (err) {
1368#ifdef CONFIG_IP_FIB_TRIE_STATS 1379#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 if (err <= 0) 1380 t->stats.semantic_match_passed++;
1370 t->stats.semantic_match_passed++; 1381#endif
1371 else 1382 return err;
1372 t->stats.semantic_match_miss++; 1383 }
1384 if (fi->fib_flags & RTNH_F_DEAD)
1385 continue;
1386 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1387 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1388
1389 if (nh->nh_flags & RTNH_F_DEAD)
1390 continue;
1391 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
1392 continue;
1393
1394#ifdef CONFIG_IP_FIB_TRIE_STATS
1395 t->stats.semantic_match_passed++;
1396#endif
1397 res->prefixlen = plen;
1398 res->nh_sel = nhsel;
1399 res->type = fa->fa_type;
1400 res->scope = fa->fa_info->fib_scope;
1401 res->fi = fi;
1402 res->table = tb;
1403 res->fa_head = &li->falh;
1404 if (!(fib_flags & FIB_LOOKUP_NOREF))
1405 atomic_inc(&res->fi->fib_clntref);
1406 return 0;
1407 }
1408 }
1409
1410#ifdef CONFIG_IP_FIB_TRIE_STATS
1411 t->stats.semantic_match_miss++;
1373#endif 1412#endif
1374 if (err <= 0)
1375 return err;
1376 } 1413 }
1377 1414
1378 return 1; 1415 return 1;
1379} 1416}
1380 1417
1381int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1418int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1382 struct fib_result *res) 1419 struct fib_result *res, int fib_flags)
1383{ 1420{
1384 struct trie *t = (struct trie *) tb->tb_data; 1421 struct trie *t = (struct trie *) tb->tb_data;
1385 int ret; 1422 int ret;
1386 struct node *n; 1423 struct rt_trie_node *n;
1387 struct tnode *pn; 1424 struct tnode *pn;
1388 int pos, bits; 1425 unsigned int pos, bits;
1389 t_key key = ntohl(flp->fl4_dst); 1426 t_key key = ntohl(flp->daddr);
1390 int chopped_off; 1427 unsigned int chopped_off;
1391 t_key cindex = 0; 1428 t_key cindex = 0;
1392 int current_prefix_length = KEYLENGTH; 1429 unsigned int current_prefix_length = KEYLENGTH;
1393 struct tnode *cn; 1430 struct tnode *cn;
1394 t_key node_prefix, key_prefix, pref_mismatch; 1431 t_key pref_mismatch;
1395 int mp;
1396 1432
1397 rcu_read_lock(); 1433 rcu_read_lock();
1398 1434
@@ -1406,7 +1442,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1406 1442
1407 /* Just a leaf? */ 1443 /* Just a leaf? */
1408 if (IS_LEAF(n)) { 1444 if (IS_LEAF(n)) {
1409 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1445 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1410 goto found; 1446 goto found;
1411 } 1447 }
1412 1448
@@ -1431,7 +1467,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1431 } 1467 }
1432 1468
1433 if (IS_LEAF(n)) { 1469 if (IS_LEAF(n)) {
1434 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1470 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
1435 if (ret > 0) 1471 if (ret > 0)
1436 goto backtrace; 1472 goto backtrace;
1437 goto found; 1473 goto found;
@@ -1507,10 +1543,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1507 * matching prefix. 1543 * matching prefix.
1508 */ 1544 */
1509 1545
1510 node_prefix = mask_pfx(cn->key, cn->pos); 1546 pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
1511 key_prefix = mask_pfx(key, cn->pos);
1512 pref_mismatch = key_prefix^node_prefix;
1513 mp = 0;
1514 1547
1515 /* 1548 /*
1516 * In short: If skipped bits in this node do not match 1549 * In short: If skipped bits in this node do not match
@@ -1518,13 +1551,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1518 * state.directly. 1551 * state.directly.
1519 */ 1552 */
1520 if (pref_mismatch) { 1553 if (pref_mismatch) {
1521 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1554 int mp = KEYLENGTH - fls(pref_mismatch);
1522 mp++;
1523 pref_mismatch = pref_mismatch << 1;
1524 }
1525 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1526 1555
1527 if (key_prefix != 0) 1556 if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
1528 goto backtrace; 1557 goto backtrace;
1529 1558
1530 if (current_prefix_length >= cn->pos) 1559 if (current_prefix_length >= cn->pos)
@@ -1556,7 +1585,7 @@ backtrace:
1556 if (chopped_off <= pn->bits) { 1585 if (chopped_off <= pn->bits) {
1557 cindex &= ~(1 << (chopped_off-1)); 1586 cindex &= ~(1 << (chopped_off-1));
1558 } else { 1587 } else {
1559 struct tnode *parent = node_parent_rcu((struct node *) pn); 1588 struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
1560 if (!parent) 1589 if (!parent)
1561 goto failed; 1590 goto failed;
1562 1591
@@ -1583,7 +1612,7 @@ found:
1583 */ 1612 */
1584static void trie_leaf_remove(struct trie *t, struct leaf *l) 1613static void trie_leaf_remove(struct trie *t, struct leaf *l)
1585{ 1614{
1586 struct tnode *tp = node_parent((struct node *) l); 1615 struct tnode *tp = node_parent((struct rt_trie_node *) l);
1587 1616
1588 pr_debug("entering trie_leaf_remove(%p)\n", l); 1617 pr_debug("entering trie_leaf_remove(%p)\n", l);
1589 1618
@@ -1644,7 +1673,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1644 1673
1645 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && 1674 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1646 (cfg->fc_scope == RT_SCOPE_NOWHERE || 1675 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1647 fa->fa_scope == cfg->fc_scope) && 1676 fa->fa_info->fib_scope == cfg->fc_scope) &&
1677 (!cfg->fc_prefsrc ||
1678 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1648 (!cfg->fc_protocol || 1679 (!cfg->fc_protocol ||
1649 fi->fib_protocol == cfg->fc_protocol) && 1680 fi->fib_protocol == cfg->fc_protocol) &&
1650 fib_nh_match(cfg, fi) == 0) { 1681 fib_nh_match(cfg, fi) == 0) {
@@ -1665,6 +1696,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
1665 1696
1666 list_del_rcu(&fa->fa_list); 1697 list_del_rcu(&fa->fa_list);
1667 1698
1699 if (!plen)
1700 tb->tb_num_default--;
1701
1668 if (list_empty(fa_head)) { 1702 if (list_empty(fa_head)) {
1669 hlist_del_rcu(&li->hlist); 1703 hlist_del_rcu(&li->hlist);
1670 free_leaf_info(li); 1704 free_leaf_info(li);
@@ -1721,7 +1755,7 @@ static int trie_flush_leaf(struct leaf *l)
1721 * Scan for the next right leaf starting at node p->child[idx] 1755 * Scan for the next right leaf starting at node p->child[idx]
1722 * Since we have back pointer, no recursion necessary. 1756 * Since we have back pointer, no recursion necessary.
1723 */ 1757 */
1724static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) 1758static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
1725{ 1759{
1726 do { 1760 do {
1727 t_key idx; 1761 t_key idx;
@@ -1737,7 +1771,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1737 continue; 1771 continue;
1738 1772
1739 if (IS_LEAF(c)) { 1773 if (IS_LEAF(c)) {
1740 prefetch(p->child[idx]); 1774 prefetch(rcu_dereference_rtnl(p->child[idx]));
1741 return (struct leaf *) c; 1775 return (struct leaf *) c;
1742 } 1776 }
1743 1777
@@ -1747,17 +1781,15 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1747 } 1781 }
1748 1782
1749 /* Node empty, walk back up to parent */ 1783 /* Node empty, walk back up to parent */
1750 c = (struct node *) p; 1784 c = (struct rt_trie_node *) p;
1751 } while ( (p = node_parent_rcu(c)) != NULL); 1785 } while ((p = node_parent_rcu(c)) != NULL);
1752 1786
1753 return NULL; /* Root of trie */ 1787 return NULL; /* Root of trie */
1754} 1788}
1755 1789
1756static struct leaf *trie_firstleaf(struct trie *t) 1790static struct leaf *trie_firstleaf(struct trie *t)
1757{ 1791{
1758 struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, 1792 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
1759 rcu_read_lock_held() ||
1760 lockdep_rtnl_is_held());
1761 1793
1762 if (!n) 1794 if (!n)
1763 return NULL; 1795 return NULL;
@@ -1770,7 +1802,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
1770 1802
1771static struct leaf *trie_nextleaf(struct leaf *l) 1803static struct leaf *trie_nextleaf(struct leaf *l)
1772{ 1804{
1773 struct node *c = (struct node *) l; 1805 struct rt_trie_node *c = (struct rt_trie_node *) l;
1774 struct tnode *p = node_parent_rcu(c); 1806 struct tnode *p = node_parent_rcu(c);
1775 1807
1776 if (!p) 1808 if (!p)
@@ -1814,77 +1846,9 @@ int fib_table_flush(struct fib_table *tb)
1814 return found; 1846 return found;
1815} 1847}
1816 1848
1817void fib_table_select_default(struct fib_table *tb, 1849void fib_free_table(struct fib_table *tb)
1818 const struct flowi *flp,
1819 struct fib_result *res)
1820{ 1850{
1821 struct trie *t = (struct trie *) tb->tb_data; 1851 kfree(tb);
1822 int order, last_idx;
1823 struct fib_info *fi = NULL;
1824 struct fib_info *last_resort;
1825 struct fib_alias *fa = NULL;
1826 struct list_head *fa_head;
1827 struct leaf *l;
1828
1829 last_idx = -1;
1830 last_resort = NULL;
1831 order = -1;
1832
1833 rcu_read_lock();
1834
1835 l = fib_find_node(t, 0);
1836 if (!l)
1837 goto out;
1838
1839 fa_head = get_fa_head(l, 0);
1840 if (!fa_head)
1841 goto out;
1842
1843 if (list_empty(fa_head))
1844 goto out;
1845
1846 list_for_each_entry_rcu(fa, fa_head, fa_list) {
1847 struct fib_info *next_fi = fa->fa_info;
1848
1849 if (fa->fa_scope != res->scope ||
1850 fa->fa_type != RTN_UNICAST)
1851 continue;
1852
1853 if (next_fi->fib_priority > res->fi->fib_priority)
1854 break;
1855 if (!next_fi->fib_nh[0].nh_gw ||
1856 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1857 continue;
1858 fa->fa_state |= FA_S_ACCESSED;
1859
1860 if (fi == NULL) {
1861 if (next_fi != res->fi)
1862 break;
1863 } else if (!fib_detect_death(fi, order, &last_resort,
1864 &last_idx, tb->tb_default)) {
1865 fib_result_assign(res, fi);
1866 tb->tb_default = order;
1867 goto out;
1868 }
1869 fi = next_fi;
1870 order++;
1871 }
1872 if (order <= 0 || fi == NULL) {
1873 tb->tb_default = -1;
1874 goto out;
1875 }
1876
1877 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1878 tb->tb_default)) {
1879 fib_result_assign(res, fi);
1880 tb->tb_default = order;
1881 goto out;
1882 }
1883 if (last_idx >= 0)
1884 fib_result_assign(res, last_resort);
1885 tb->tb_default = last_idx;
1886out:
1887 rcu_read_unlock();
1888} 1852}
1889 1853
1890static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, 1854static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
@@ -1911,7 +1875,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1911 RTM_NEWROUTE, 1875 RTM_NEWROUTE,
1912 tb->tb_id, 1876 tb->tb_id,
1913 fa->fa_type, 1877 fa->fa_type,
1914 fa->fa_scope,
1915 xkey, 1878 xkey,
1916 plen, 1879 plen,
1917 fa->fa_tos, 1880 fa->fa_tos,
@@ -2001,7 +1964,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2001 return skb->len; 1964 return skb->len;
2002} 1965}
2003 1966
2004void __init fib_hash_init(void) 1967void __init fib_trie_init(void)
2005{ 1968{
2006 fn_alias_kmem = kmem_cache_create("ip_fib_alias", 1969 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2007 sizeof(struct fib_alias), 1970 sizeof(struct fib_alias),
@@ -2014,8 +1977,7 @@ void __init fib_hash_init(void)
2014} 1977}
2015 1978
2016 1979
2017/* Fix more generic FIB names for init later */ 1980struct fib_table *fib_trie_table(u32 id)
2018struct fib_table *fib_hash_table(u32 id)
2019{ 1981{
2020 struct fib_table *tb; 1982 struct fib_table *tb;
2021 struct trie *t; 1983 struct trie *t;
@@ -2027,13 +1989,11 @@ struct fib_table *fib_hash_table(u32 id)
2027 1989
2028 tb->tb_id = id; 1990 tb->tb_id = id;
2029 tb->tb_default = -1; 1991 tb->tb_default = -1;
1992 tb->tb_num_default = 0;
2030 1993
2031 t = (struct trie *) tb->tb_data; 1994 t = (struct trie *) tb->tb_data;
2032 memset(t, 0, sizeof(*t)); 1995 memset(t, 0, sizeof(*t));
2033 1996
2034 if (id == RT_TABLE_LOCAL)
2035 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2036
2037 return tb; 1997 return tb;
2038} 1998}
2039 1999
@@ -2043,14 +2003,14 @@ struct fib_trie_iter {
2043 struct seq_net_private p; 2003 struct seq_net_private p;
2044 struct fib_table *tb; 2004 struct fib_table *tb;
2045 struct tnode *tnode; 2005 struct tnode *tnode;
2046 unsigned index; 2006 unsigned int index;
2047 unsigned depth; 2007 unsigned int depth;
2048}; 2008};
2049 2009
2050static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2010static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
2051{ 2011{
2052 struct tnode *tn = iter->tnode; 2012 struct tnode *tn = iter->tnode;
2053 unsigned cindex = iter->index; 2013 unsigned int cindex = iter->index;
2054 struct tnode *p; 2014 struct tnode *p;
2055 2015
2056 /* A single entry routing table */ 2016 /* A single entry routing table */
@@ -2061,7 +2021,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2061 iter->tnode, iter->index, iter->depth); 2021 iter->tnode, iter->index, iter->depth);
2062rescan: 2022rescan:
2063 while (cindex < (1<<tn->bits)) { 2023 while (cindex < (1<<tn->bits)) {
2064 struct node *n = tnode_get_child_rcu(tn, cindex); 2024 struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
2065 2025
2066 if (n) { 2026 if (n) {
2067 if (IS_LEAF(n)) { 2027 if (IS_LEAF(n)) {
@@ -2080,7 +2040,7 @@ rescan:
2080 } 2040 }
2081 2041
2082 /* Current node exhausted, pop back up */ 2042 /* Current node exhausted, pop back up */
2083 p = node_parent_rcu((struct node *)tn); 2043 p = node_parent_rcu((struct rt_trie_node *)tn);
2084 if (p) { 2044 if (p) {
2085 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 2045 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2086 tn = p; 2046 tn = p;
@@ -2092,10 +2052,10 @@ rescan:
2092 return NULL; 2052 return NULL;
2093} 2053}
2094 2054
2095static struct node *fib_trie_get_first(struct fib_trie_iter *iter, 2055static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
2096 struct trie *t) 2056 struct trie *t)
2097{ 2057{
2098 struct node *n; 2058 struct rt_trie_node *n;
2099 2059
2100 if (!t) 2060 if (!t)
2101 return NULL; 2061 return NULL;
@@ -2119,7 +2079,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2119 2079
2120static void trie_collect_stats(struct trie *t, struct trie_stat *s) 2080static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2121{ 2081{
2122 struct node *n; 2082 struct rt_trie_node *n;
2123 struct fib_trie_iter iter; 2083 struct fib_trie_iter iter;
2124 2084
2125 memset(s, 0, sizeof(*s)); 2085 memset(s, 0, sizeof(*s));
@@ -2159,7 +2119,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2159 */ 2119 */
2160static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 2120static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2161{ 2121{
2162 unsigned i, max, pointers, bytes, avdepth; 2122 unsigned int i, max, pointers, bytes, avdepth;
2163 2123
2164 if (stat->leaves) 2124 if (stat->leaves)
2165 avdepth = stat->totdepth*100 / stat->leaves; 2125 avdepth = stat->totdepth*100 / stat->leaves;
@@ -2192,7 +2152,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2192 seq_putc(seq, '\n'); 2152 seq_putc(seq, '\n');
2193 seq_printf(seq, "\tPointers: %u\n", pointers); 2153 seq_printf(seq, "\tPointers: %u\n", pointers);
2194 2154
2195 bytes += sizeof(struct node *) * pointers; 2155 bytes += sizeof(struct rt_trie_node *) * pointers;
2196 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); 2156 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2197 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); 2157 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2198} 2158}
@@ -2273,7 +2233,7 @@ static const struct file_operations fib_triestat_fops = {
2273 .release = single_release_net, 2233 .release = single_release_net,
2274}; 2234};
2275 2235
2276static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) 2236static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2277{ 2237{
2278 struct fib_trie_iter *iter = seq->private; 2238 struct fib_trie_iter *iter = seq->private;
2279 struct net *net = seq_file_net(seq); 2239 struct net *net = seq_file_net(seq);
@@ -2286,7 +2246,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2286 struct fib_table *tb; 2246 struct fib_table *tb;
2287 2247
2288 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 2248 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2289 struct node *n; 2249 struct rt_trie_node *n;
2290 2250
2291 for (n = fib_trie_get_first(iter, 2251 for (n = fib_trie_get_first(iter,
2292 (struct trie *) tb->tb_data); 2252 (struct trie *) tb->tb_data);
@@ -2315,7 +2275,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2315 struct fib_table *tb = iter->tb; 2275 struct fib_table *tb = iter->tb;
2316 struct hlist_node *tb_node; 2276 struct hlist_node *tb_node;
2317 unsigned int h; 2277 unsigned int h;
2318 struct node *n; 2278 struct rt_trie_node *n;
2319 2279
2320 ++*pos; 2280 ++*pos;
2321 /* next node in same table */ 2281 /* next node in same table */
@@ -2325,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2325 2285
2326 /* walk rest of this hash chain */ 2286 /* walk rest of this hash chain */
2327 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); 2287 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2328 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { 2288 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2329 tb = hlist_entry(tb_node, struct fib_table, tb_hlist); 2289 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2330 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); 2290 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2331 if (n) 2291 if (n)
@@ -2356,7 +2316,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2356 2316
2357static void seq_indent(struct seq_file *seq, int n) 2317static void seq_indent(struct seq_file *seq, int n)
2358{ 2318{
2359 while (n-- > 0) seq_puts(seq, " "); 2319 while (n-- > 0)
2320 seq_puts(seq, " ");
2360} 2321}
2361 2322
2362static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2323static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2349,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
2388 [RTN_XRESOLVE] = "XRESOLVE", 2349 [RTN_XRESOLVE] = "XRESOLVE",
2389}; 2350};
2390 2351
2391static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2352static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2392{ 2353{
2393 if (t < __RTN_MAX && rtn_type_names[t]) 2354 if (t < __RTN_MAX && rtn_type_names[t])
2394 return rtn_type_names[t]; 2355 return rtn_type_names[t];
@@ -2400,7 +2361,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned t)
2400static int fib_trie_seq_show(struct seq_file *seq, void *v) 2361static int fib_trie_seq_show(struct seq_file *seq, void *v)
2401{ 2362{
2402 const struct fib_trie_iter *iter = seq->private; 2363 const struct fib_trie_iter *iter = seq->private;
2403 struct node *n = v; 2364 struct rt_trie_node *n = v;
2404 2365
2405 if (!node_parent_rcu(n)) 2366 if (!node_parent_rcu(n))
2406 fib_table_print(seq, iter->tb); 2367 fib_table_print(seq, iter->tb);
@@ -2432,7 +2393,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2432 seq_indent(seq, iter->depth+1); 2393 seq_indent(seq, iter->depth+1);
2433 seq_printf(seq, " /%d %s %s", li->plen, 2394 seq_printf(seq, " /%d %s %s", li->plen,
2434 rtn_scope(buf1, sizeof(buf1), 2395 rtn_scope(buf1, sizeof(buf1),
2435 fa->fa_scope), 2396 fa->fa_info->fib_scope),
2436 rtn_type(buf2, sizeof(buf2), 2397 rtn_type(buf2, sizeof(buf2),
2437 fa->fa_type)); 2398 fa->fa_type));
2438 if (fa->fa_tos) 2399 if (fa->fa_tos)
@@ -2544,13 +2505,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
2544 rcu_read_unlock(); 2505 rcu_read_unlock();
2545} 2506}
2546 2507
2547static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2508static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2548{ 2509{
2549 static unsigned type2flags[RTN_MAX + 1] = { 2510 unsigned int flags = 0;
2550 [7] = RTF_REJECT, [8] = RTF_REJECT,
2551 };
2552 unsigned flags = type2flags[type];
2553 2511
2512 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2513 flags = RTF_REJECT;
2554 if (fi && fi->fib_nh->nh_gw) 2514 if (fi && fi->fib_nh->nh_gw)
2555 flags |= RTF_GATEWAY; 2515 flags |= RTF_GATEWAY;
2556 if (mask == htonl(0xFFFFFFFF)) 2516 if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2522,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2562/* 2522/*
2563 * This outputs /proc/net/route. 2523 * This outputs /proc/net/route.
2564 * The format of the file is not supposed to be changed 2524 * The format of the file is not supposed to be changed
2565 * and needs to be same as fib_hash output to avoid breaking 2525 * and needs to be same as fib_hash output to avoid breaking
2566 * legacy utilities 2526 * legacy utilities
2567 */ 2527 */
2568static int fib_route_seq_show(struct seq_file *seq, void *v) 2528static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2547,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2587 2547
2588 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2548 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2589 const struct fib_info *fi = fa->fa_info; 2549 const struct fib_info *fi = fa->fa_info;
2590 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2550 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2591 int len; 2551 int len;
2592 2552
2593 if (fa->fa_type == RTN_BROADCAST 2553 if (fa->fa_type == RTN_BROADCAST