diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r-- | net/ipv4/fib_trie.c | 458 |
1 files changed, 209 insertions, 249 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4a8e370862bc..58c25ea5a5c1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -12,11 +12,11 @@ | |||
12 | * | 12 | * |
13 | * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet | 13 | * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet |
14 | * | 14 | * |
15 | * This work is based on the LPC-trie which is originally descibed in: | 15 | * This work is based on the LPC-trie which is originally described in: |
16 | * | 16 | * |
17 | * An experimental study of compression methods for dynamic tries | 17 | * An experimental study of compression methods for dynamic tries |
18 | * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | 18 | * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. |
19 | * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ | 19 | * http://www.csc.kth.se/~snilsson/software/dyntrie2/ |
20 | * | 20 | * |
21 | * | 21 | * |
22 | * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | 22 | * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson |
@@ -72,6 +72,7 @@ | |||
72 | #include <linux/init.h> | 72 | #include <linux/init.h> |
73 | #include <linux/list.h> | 73 | #include <linux/list.h> |
74 | #include <linux/slab.h> | 74 | #include <linux/slab.h> |
75 | #include <linux/prefetch.h> | ||
75 | #include <net/net_namespace.h> | 76 | #include <net/net_namespace.h> |
76 | #include <net/ip.h> | 77 | #include <net/ip.h> |
77 | #include <net/protocol.h> | 78 | #include <net/protocol.h> |
@@ -95,7 +96,7 @@ typedef unsigned int t_key; | |||
95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | 96 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) |
96 | #define IS_LEAF(n) (n->parent & T_LEAF) | 97 | #define IS_LEAF(n) (n->parent & T_LEAF) |
97 | 98 | ||
98 | struct node { | 99 | struct rt_trie_node { |
99 | unsigned long parent; | 100 | unsigned long parent; |
100 | t_key key; | 101 | t_key key; |
101 | }; | 102 | }; |
@@ -126,7 +127,7 @@ struct tnode { | |||
126 | struct work_struct work; | 127 | struct work_struct work; |
127 | struct tnode *tnode_free; | 128 | struct tnode *tnode_free; |
128 | }; | 129 | }; |
129 | struct node *child[0]; | 130 | struct rt_trie_node __rcu *child[0]; |
130 | }; | 131 | }; |
131 | 132 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 133 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,16 +152,16 @@ struct trie_stat { | |||
151 | }; | 152 | }; |
152 | 153 | ||
153 | struct trie { | 154 | struct trie { |
154 | struct node *trie; | 155 | struct rt_trie_node __rcu *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 156 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 157 | struct trie_use_stats stats; |
157 | #endif | 158 | #endif |
158 | }; | 159 | }; |
159 | 160 | ||
160 | static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); | 161 | static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); |
161 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 162 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
162 | int wasfull); | 163 | int wasfull); |
163 | static struct node *resize(struct trie *t, struct tnode *tn); | 164 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); |
164 | static struct tnode *inflate(struct trie *t, struct tnode *tn); | 165 | static struct tnode *inflate(struct trie *t, struct tnode *tn); |
165 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 166 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
166 | /* tnodes to free after resize(); protected by RTNL */ | 167 | /* tnodes to free after resize(); protected by RTNL */ |
@@ -177,43 +178,58 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 178 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 179 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 180 | ||
180 | static inline struct tnode *node_parent(struct node *node) | 181 | /* |
182 | * caller must hold RTNL | ||
183 | */ | ||
184 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
181 | { | 185 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 186 | unsigned long parent; |
187 | |||
188 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | ||
189 | |||
190 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
183 | } | 191 | } |
184 | 192 | ||
185 | static inline struct tnode *node_parent_rcu(struct node *node) | 193 | /* |
194 | * caller must hold RCU read lock or RTNL | ||
195 | */ | ||
196 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
186 | { | 197 | { |
187 | struct tnode *ret = node_parent(node); | 198 | unsigned long parent; |
199 | |||
200 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
201 | lockdep_rtnl_is_held()); | ||
188 | 202 | ||
189 | return rcu_dereference_check(ret, | 203 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
190 | rcu_read_lock_held() || | ||
191 | lockdep_rtnl_is_held()); | ||
192 | } | 204 | } |
193 | 205 | ||
194 | /* Same as rcu_assign_pointer | 206 | /* Same as rcu_assign_pointer |
195 | * but that macro() assumes that value is a pointer. | 207 | * but that macro() assumes that value is a pointer. |
196 | */ | 208 | */ |
197 | static inline void node_set_parent(struct node *node, struct tnode *ptr) | 209 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
198 | { | 210 | { |
199 | smp_wmb(); | 211 | smp_wmb(); |
200 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 212 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
201 | } | 213 | } |
202 | 214 | ||
203 | static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) | 215 | /* |
216 | * caller must hold RTNL | ||
217 | */ | ||
218 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | ||
204 | { | 219 | { |
205 | BUG_ON(i >= 1U << tn->bits); | 220 | BUG_ON(i >= 1U << tn->bits); |
206 | 221 | ||
207 | return tn->child[i]; | 222 | return rtnl_dereference(tn->child[i]); |
208 | } | 223 | } |
209 | 224 | ||
210 | static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 225 | /* |
226 | * caller must hold RCU read lock or RTNL | ||
227 | */ | ||
228 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | ||
211 | { | 229 | { |
212 | struct node *ret = tnode_get_child(tn, i); | 230 | BUG_ON(i >= 1U << tn->bits); |
213 | 231 | ||
214 | return rcu_dereference_check(ret, | 232 | return rcu_dereference_rtnl(tn->child[i]); |
215 | rcu_read_lock_held() || | ||
216 | lockdep_rtnl_is_held()); | ||
217 | } | 233 | } |
218 | 234 | ||
219 | static inline int tnode_child_length(const struct tnode *tn) | 235 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -221,12 +237,12 @@ static inline int tnode_child_length(const struct tnode *tn) | |||
221 | return 1 << tn->bits; | 237 | return 1 << tn->bits; |
222 | } | 238 | } |
223 | 239 | ||
224 | static inline t_key mask_pfx(t_key k, unsigned short l) | 240 | static inline t_key mask_pfx(t_key k, unsigned int l) |
225 | { | 241 | { |
226 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); | 242 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); |
227 | } | 243 | } |
228 | 244 | ||
229 | static inline t_key tkey_extract_bits(t_key a, int offset, int bits) | 245 | static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) |
230 | { | 246 | { |
231 | if (offset < KEYLENGTH) | 247 | if (offset < KEYLENGTH) |
232 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); | 248 | return ((t_key)(a << offset)) >> (KEYLENGTH - bits); |
@@ -354,14 +370,9 @@ static inline void free_leaf(struct leaf *l) | |||
354 | call_rcu_bh(&l->rcu, __leaf_free_rcu); | 370 | call_rcu_bh(&l->rcu, __leaf_free_rcu); |
355 | } | 371 | } |
356 | 372 | ||
357 | static void __leaf_info_free_rcu(struct rcu_head *head) | ||
358 | { | ||
359 | kfree(container_of(head, struct leaf_info, rcu)); | ||
360 | } | ||
361 | |||
362 | static inline void free_leaf_info(struct leaf_info *leaf) | 373 | static inline void free_leaf_info(struct leaf_info *leaf) |
363 | { | 374 | { |
364 | call_rcu(&leaf->rcu, __leaf_info_free_rcu); | 375 | kfree_rcu(leaf, rcu); |
365 | } | 376 | } |
366 | 377 | ||
367 | static struct tnode *tnode_alloc(size_t size) | 378 | static struct tnode *tnode_alloc(size_t size) |
@@ -369,7 +380,7 @@ static struct tnode *tnode_alloc(size_t size) | |||
369 | if (size <= PAGE_SIZE) | 380 | if (size <= PAGE_SIZE) |
370 | return kzalloc(size, GFP_KERNEL); | 381 | return kzalloc(size, GFP_KERNEL); |
371 | else | 382 | else |
372 | return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); | 383 | return vzalloc(size); |
373 | } | 384 | } |
374 | 385 | ||
375 | static void __tnode_vfree(struct work_struct *arg) | 386 | static void __tnode_vfree(struct work_struct *arg) |
@@ -382,7 +393,7 @@ static void __tnode_free_rcu(struct rcu_head *head) | |||
382 | { | 393 | { |
383 | struct tnode *tn = container_of(head, struct tnode, rcu); | 394 | struct tnode *tn = container_of(head, struct tnode, rcu); |
384 | size_t size = sizeof(struct tnode) + | 395 | size_t size = sizeof(struct tnode) + |
385 | (sizeof(struct node *) << tn->bits); | 396 | (sizeof(struct rt_trie_node *) << tn->bits); |
386 | 397 | ||
387 | if (size <= PAGE_SIZE) | 398 | if (size <= PAGE_SIZE) |
388 | kfree(tn); | 399 | kfree(tn); |
@@ -406,7 +417,7 @@ static void tnode_free_safe(struct tnode *tn) | |||
406 | tn->tnode_free = tnode_free_head; | 417 | tn->tnode_free = tnode_free_head; |
407 | tnode_free_head = tn; | 418 | tnode_free_head = tn; |
408 | tnode_free_size += sizeof(struct tnode) + | 419 | tnode_free_size += sizeof(struct tnode) + |
409 | (sizeof(struct node *) << tn->bits); | 420 | (sizeof(struct rt_trie_node *) << tn->bits); |
410 | } | 421 | } |
411 | 422 | ||
412 | static void tnode_free_flush(void) | 423 | static void tnode_free_flush(void) |
@@ -447,7 +458,7 @@ static struct leaf_info *leaf_info_new(int plen) | |||
447 | 458 | ||
448 | static struct tnode *tnode_new(t_key key, int pos, int bits) | 459 | static struct tnode *tnode_new(t_key key, int pos, int bits) |
449 | { | 460 | { |
450 | size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); | 461 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); |
451 | struct tnode *tn = tnode_alloc(sz); | 462 | struct tnode *tn = tnode_alloc(sz); |
452 | 463 | ||
453 | if (tn) { | 464 | if (tn) { |
@@ -459,8 +470,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
459 | tn->empty_children = 1<<bits; | 470 | tn->empty_children = 1<<bits; |
460 | } | 471 | } |
461 | 472 | ||
462 | pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), | 473 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), |
463 | (unsigned long) (sizeof(struct node) << bits)); | 474 | sizeof(struct rt_trie_node) << bits); |
464 | return tn; | 475 | return tn; |
465 | } | 476 | } |
466 | 477 | ||
@@ -469,7 +480,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
469 | * and no bits are skipped. See discussion in dyntree paper p. 6 | 480 | * and no bits are skipped. See discussion in dyntree paper p. 6 |
470 | */ | 481 | */ |
471 | 482 | ||
472 | static inline int tnode_full(const struct tnode *tn, const struct node *n) | 483 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) |
473 | { | 484 | { |
474 | if (n == NULL || IS_LEAF(n)) | 485 | if (n == NULL || IS_LEAF(n)) |
475 | return 0; | 486 | return 0; |
@@ -478,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) | |||
478 | } | 489 | } |
479 | 490 | ||
480 | static inline void put_child(struct trie *t, struct tnode *tn, int i, | 491 | static inline void put_child(struct trie *t, struct tnode *tn, int i, |
481 | struct node *n) | 492 | struct rt_trie_node *n) |
482 | { | 493 | { |
483 | tnode_put_child_reorg(tn, i, n, -1); | 494 | tnode_put_child_reorg(tn, i, n, -1); |
484 | } | 495 | } |
@@ -488,10 +499,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
488 | * Update the value of full_children and empty_children. | 499 | * Update the value of full_children and empty_children. |
489 | */ | 500 | */ |
490 | 501 | ||
491 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | 502 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
492 | int wasfull) | 503 | int wasfull) |
493 | { | 504 | { |
494 | struct node *chi = tn->child[i]; | 505 | struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); |
495 | int isfull; | 506 | int isfull; |
496 | 507 | ||
497 | BUG_ON(i >= 1<<tn->bits); | 508 | BUG_ON(i >= 1<<tn->bits); |
@@ -519,7 +530,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
519 | } | 530 | } |
520 | 531 | ||
521 | #define MAX_WORK 10 | 532 | #define MAX_WORK 10 |
522 | static struct node *resize(struct trie *t, struct tnode *tn) | 533 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) |
523 | { | 534 | { |
524 | int i; | 535 | int i; |
525 | struct tnode *old_tn; | 536 | struct tnode *old_tn; |
@@ -609,11 +620,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
609 | 620 | ||
610 | /* Keep root node larger */ | 621 | /* Keep root node larger */ |
611 | 622 | ||
612 | if (!node_parent((struct node*) tn)) { | 623 | if (!node_parent((struct rt_trie_node *)tn)) { |
613 | inflate_threshold_use = inflate_threshold_root; | 624 | inflate_threshold_use = inflate_threshold_root; |
614 | halve_threshold_use = halve_threshold_root; | 625 | halve_threshold_use = halve_threshold_root; |
615 | } | 626 | } else { |
616 | else { | ||
617 | inflate_threshold_use = inflate_threshold; | 627 | inflate_threshold_use = inflate_threshold; |
618 | halve_threshold_use = halve_threshold; | 628 | halve_threshold_use = halve_threshold; |
619 | } | 629 | } |
@@ -639,8 +649,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
639 | check_tnode(tn); | 649 | check_tnode(tn); |
640 | 650 | ||
641 | /* Return if at least one inflate is run */ | 651 | /* Return if at least one inflate is run */ |
642 | if( max_work != MAX_WORK) | 652 | if (max_work != MAX_WORK) |
643 | return (struct node *) tn; | 653 | return (struct rt_trie_node *) tn; |
644 | 654 | ||
645 | /* | 655 | /* |
646 | * Halve as long as the number of empty children in this | 656 | * Halve as long as the number of empty children in this |
@@ -668,9 +678,9 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
668 | if (tn->empty_children == tnode_child_length(tn) - 1) { | 678 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
669 | one_child: | 679 | one_child: |
670 | for (i = 0; i < tnode_child_length(tn); i++) { | 680 | for (i = 0; i < tnode_child_length(tn); i++) { |
671 | struct node *n; | 681 | struct rt_trie_node *n; |
672 | 682 | ||
673 | n = tn->child[i]; | 683 | n = rtnl_dereference(tn->child[i]); |
674 | if (!n) | 684 | if (!n) |
675 | continue; | 685 | continue; |
676 | 686 | ||
@@ -681,7 +691,21 @@ one_child: | |||
681 | return n; | 691 | return n; |
682 | } | 692 | } |
683 | } | 693 | } |
684 | return (struct node *) tn; | 694 | return (struct rt_trie_node *) tn; |
695 | } | ||
696 | |||
697 | |||
698 | static void tnode_clean_free(struct tnode *tn) | ||
699 | { | ||
700 | int i; | ||
701 | struct tnode *tofree; | ||
702 | |||
703 | for (i = 0; i < tnode_child_length(tn); i++) { | ||
704 | tofree = (struct tnode *)rtnl_dereference(tn->child[i]); | ||
705 | if (tofree) | ||
706 | tnode_free(tofree); | ||
707 | } | ||
708 | tnode_free(tn); | ||
685 | } | 709 | } |
686 | 710 | ||
687 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 711 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
@@ -728,14 +752,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
728 | goto nomem; | 752 | goto nomem; |
729 | } | 753 | } |
730 | 754 | ||
731 | put_child(t, tn, 2*i, (struct node *) left); | 755 | put_child(t, tn, 2*i, (struct rt_trie_node *) left); |
732 | put_child(t, tn, 2*i+1, (struct node *) right); | 756 | put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); |
733 | } | 757 | } |
734 | } | 758 | } |
735 | 759 | ||
736 | for (i = 0; i < olen; i++) { | 760 | for (i = 0; i < olen; i++) { |
737 | struct tnode *inode; | 761 | struct tnode *inode; |
738 | struct node *node = tnode_get_child(oldtnode, i); | 762 | struct rt_trie_node *node = tnode_get_child(oldtnode, i); |
739 | struct tnode *left, *right; | 763 | struct tnode *left, *right; |
740 | int size, j; | 764 | int size, j; |
741 | 765 | ||
@@ -760,8 +784,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
760 | inode = (struct tnode *) node; | 784 | inode = (struct tnode *) node; |
761 | 785 | ||
762 | if (inode->bits == 1) { | 786 | if (inode->bits == 1) { |
763 | put_child(t, tn, 2*i, inode->child[0]); | 787 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); |
764 | put_child(t, tn, 2*i+1, inode->child[1]); | 788 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); |
765 | 789 | ||
766 | tnode_free_safe(inode); | 790 | tnode_free_safe(inode); |
767 | continue; | 791 | continue; |
@@ -802,8 +826,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
802 | 826 | ||
803 | size = tnode_child_length(left); | 827 | size = tnode_child_length(left); |
804 | for (j = 0; j < size; j++) { | 828 | for (j = 0; j < size; j++) { |
805 | put_child(t, left, j, inode->child[j]); | 829 | put_child(t, left, j, rtnl_dereference(inode->child[j])); |
806 | put_child(t, right, j, inode->child[j + size]); | 830 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); |
807 | } | 831 | } |
808 | put_child(t, tn, 2*i, resize(t, left)); | 832 | put_child(t, tn, 2*i, resize(t, left)); |
809 | put_child(t, tn, 2*i+1, resize(t, right)); | 833 | put_child(t, tn, 2*i+1, resize(t, right)); |
@@ -813,24 +837,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
813 | tnode_free_safe(oldtnode); | 837 | tnode_free_safe(oldtnode); |
814 | return tn; | 838 | return tn; |
815 | nomem: | 839 | nomem: |
816 | { | 840 | tnode_clean_free(tn); |
817 | int size = tnode_child_length(tn); | 841 | return ERR_PTR(-ENOMEM); |
818 | int j; | ||
819 | |||
820 | for (j = 0; j < size; j++) | ||
821 | if (tn->child[j]) | ||
822 | tnode_free((struct tnode *)tn->child[j]); | ||
823 | |||
824 | tnode_free(tn); | ||
825 | |||
826 | return ERR_PTR(-ENOMEM); | ||
827 | } | ||
828 | } | 842 | } |
829 | 843 | ||
830 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 844 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
831 | { | 845 | { |
832 | struct tnode *oldtnode = tn; | 846 | struct tnode *oldtnode = tn; |
833 | struct node *left, *right; | 847 | struct rt_trie_node *left, *right; |
834 | int i; | 848 | int i; |
835 | int olen = tnode_child_length(tn); | 849 | int olen = tnode_child_length(tn); |
836 | 850 | ||
@@ -861,7 +875,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
861 | if (!newn) | 875 | if (!newn) |
862 | goto nomem; | 876 | goto nomem; |
863 | 877 | ||
864 | put_child(t, tn, i/2, (struct node *)newn); | 878 | put_child(t, tn, i/2, (struct rt_trie_node *)newn); |
865 | } | 879 | } |
866 | 880 | ||
867 | } | 881 | } |
@@ -895,18 +909,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
895 | tnode_free_safe(oldtnode); | 909 | tnode_free_safe(oldtnode); |
896 | return tn; | 910 | return tn; |
897 | nomem: | 911 | nomem: |
898 | { | 912 | tnode_clean_free(tn); |
899 | int size = tnode_child_length(tn); | 913 | return ERR_PTR(-ENOMEM); |
900 | int j; | ||
901 | |||
902 | for (j = 0; j < size; j++) | ||
903 | if (tn->child[j]) | ||
904 | tnode_free((struct tnode *)tn->child[j]); | ||
905 | |||
906 | tnode_free(tn); | ||
907 | |||
908 | return ERR_PTR(-ENOMEM); | ||
909 | } | ||
910 | } | 914 | } |
911 | 915 | ||
912 | /* readside must use rcu_read_lock currently dump routines | 916 | /* readside must use rcu_read_lock currently dump routines |
@@ -963,12 +967,10 @@ fib_find_node(struct trie *t, u32 key) | |||
963 | { | 967 | { |
964 | int pos; | 968 | int pos; |
965 | struct tnode *tn; | 969 | struct tnode *tn; |
966 | struct node *n; | 970 | struct rt_trie_node *n; |
967 | 971 | ||
968 | pos = 0; | 972 | pos = 0; |
969 | n = rcu_dereference_check(t->trie, | 973 | n = rcu_dereference_rtnl(t->trie); |
970 | rcu_read_lock_held() || | ||
971 | lockdep_rtnl_is_held()); | ||
972 | 974 | ||
973 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 975 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { |
974 | tn = (struct tnode *) n; | 976 | tn = (struct tnode *) n; |
@@ -1000,17 +1002,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1000 | 1002 | ||
1001 | key = tn->key; | 1003 | key = tn->key; |
1002 | 1004 | ||
1003 | while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { | 1005 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { |
1004 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1006 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1005 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 1007 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
1006 | tn = (struct tnode *) resize(t, (struct tnode *)tn); | 1008 | tn = (struct tnode *) resize(t, (struct tnode *)tn); |
1007 | 1009 | ||
1008 | tnode_put_child_reorg((struct tnode *)tp, cindex, | 1010 | tnode_put_child_reorg((struct tnode *)tp, cindex, |
1009 | (struct node *)tn, wasfull); | 1011 | (struct rt_trie_node *)tn, wasfull); |
1010 | 1012 | ||
1011 | tp = node_parent((struct node *) tn); | 1013 | tp = node_parent((struct rt_trie_node *) tn); |
1012 | if (!tp) | 1014 | if (!tp) |
1013 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1015 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1014 | 1016 | ||
1015 | tnode_free_flush(); | 1017 | tnode_free_flush(); |
1016 | if (!tp) | 1018 | if (!tp) |
@@ -1022,7 +1024,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
1022 | if (IS_TNODE(tn)) | 1024 | if (IS_TNODE(tn)) |
1023 | tn = (struct tnode *)resize(t, (struct tnode *)tn); | 1025 | tn = (struct tnode *)resize(t, (struct tnode *)tn); |
1024 | 1026 | ||
1025 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1027 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1026 | tnode_free_flush(); | 1028 | tnode_free_flush(); |
1027 | } | 1029 | } |
1028 | 1030 | ||
@@ -1032,7 +1034,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1032 | { | 1034 | { |
1033 | int pos, newpos; | 1035 | int pos, newpos; |
1034 | struct tnode *tp = NULL, *tn = NULL; | 1036 | struct tnode *tp = NULL, *tn = NULL; |
1035 | struct node *n; | 1037 | struct rt_trie_node *n; |
1036 | struct leaf *l; | 1038 | struct leaf *l; |
1037 | int missbit; | 1039 | int missbit; |
1038 | struct list_head *fa_head = NULL; | 1040 | struct list_head *fa_head = NULL; |
@@ -1040,7 +1042,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1040 | t_key cindex; | 1042 | t_key cindex; |
1041 | 1043 | ||
1042 | pos = 0; | 1044 | pos = 0; |
1043 | n = t->trie; | 1045 | n = rtnl_dereference(t->trie); |
1044 | 1046 | ||
1045 | /* If we point to NULL, stop. Either the tree is empty and we should | 1047 | /* If we point to NULL, stop. Either the tree is empty and we should |
1046 | * just put a new leaf in if, or we have reached an empty child slot, | 1048 | * just put a new leaf in if, or we have reached an empty child slot, |
@@ -1118,10 +1120,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1118 | if (t->trie && n == NULL) { | 1120 | if (t->trie && n == NULL) { |
1119 | /* Case 2: n is NULL, and will just insert a new leaf */ | 1121 | /* Case 2: n is NULL, and will just insert a new leaf */ |
1120 | 1122 | ||
1121 | node_set_parent((struct node *)l, tp); | 1123 | node_set_parent((struct rt_trie_node *)l, tp); |
1122 | 1124 | ||
1123 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1125 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1124 | put_child(t, (struct tnode *)tp, cindex, (struct node *)l); | 1126 | put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); |
1125 | } else { | 1127 | } else { |
1126 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1128 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
1127 | /* | 1129 | /* |
@@ -1148,18 +1150,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1148 | return NULL; | 1150 | return NULL; |
1149 | } | 1151 | } |
1150 | 1152 | ||
1151 | node_set_parent((struct node *)tn, tp); | 1153 | node_set_parent((struct rt_trie_node *)tn, tp); |
1152 | 1154 | ||
1153 | missbit = tkey_extract_bits(key, newpos, 1); | 1155 | missbit = tkey_extract_bits(key, newpos, 1); |
1154 | put_child(t, tn, missbit, (struct node *)l); | 1156 | put_child(t, tn, missbit, (struct rt_trie_node *)l); |
1155 | put_child(t, tn, 1-missbit, n); | 1157 | put_child(t, tn, 1-missbit, n); |
1156 | 1158 | ||
1157 | if (tp) { | 1159 | if (tp) { |
1158 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1160 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
1159 | put_child(t, (struct tnode *)tp, cindex, | 1161 | put_child(t, (struct tnode *)tp, cindex, |
1160 | (struct node *)tn); | 1162 | (struct rt_trie_node *)tn); |
1161 | } else { | 1163 | } else { |
1162 | rcu_assign_pointer(t->trie, (struct node *)tn); | 1164 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1163 | tp = tn; | 1165 | tp = tn; |
1164 | } | 1166 | } |
1165 | } | 1167 | } |
@@ -1252,7 +1254,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1252 | if (fa->fa_info->fib_priority != fi->fib_priority) | 1254 | if (fa->fa_info->fib_priority != fi->fib_priority) |
1253 | break; | 1255 | break; |
1254 | if (fa->fa_type == cfg->fc_type && | 1256 | if (fa->fa_type == cfg->fc_type && |
1255 | fa->fa_scope == cfg->fc_scope && | ||
1256 | fa->fa_info == fi) { | 1257 | fa->fa_info == fi) { |
1257 | fa_match = fa; | 1258 | fa_match = fa; |
1258 | break; | 1259 | break; |
@@ -1278,7 +1279,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1278 | new_fa->fa_tos = fa->fa_tos; | 1279 | new_fa->fa_tos = fa->fa_tos; |
1279 | new_fa->fa_info = fi; | 1280 | new_fa->fa_info = fi; |
1280 | new_fa->fa_type = cfg->fc_type; | 1281 | new_fa->fa_type = cfg->fc_type; |
1281 | new_fa->fa_scope = cfg->fc_scope; | ||
1282 | state = fa->fa_state; | 1282 | state = fa->fa_state; |
1283 | new_fa->fa_state = state & ~FA_S_ACCESSED; | 1283 | new_fa->fa_state = state & ~FA_S_ACCESSED; |
1284 | 1284 | ||
@@ -1315,7 +1315,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1315 | new_fa->fa_info = fi; | 1315 | new_fa->fa_info = fi; |
1316 | new_fa->fa_tos = tos; | 1316 | new_fa->fa_tos = tos; |
1317 | new_fa->fa_type = cfg->fc_type; | 1317 | new_fa->fa_type = cfg->fc_type; |
1318 | new_fa->fa_scope = cfg->fc_scope; | ||
1319 | new_fa->fa_state = 0; | 1318 | new_fa->fa_state = 0; |
1320 | /* | 1319 | /* |
1321 | * Insert new entry to the list. | 1320 | * Insert new entry to the list. |
@@ -1329,6 +1328,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1329 | } | 1328 | } |
1330 | } | 1329 | } |
1331 | 1330 | ||
1331 | if (!plen) | ||
1332 | tb->tb_num_default++; | ||
1333 | |||
1332 | list_add_tail_rcu(&new_fa->fa_list, | 1334 | list_add_tail_rcu(&new_fa->fa_list, |
1333 | (fa ? &fa->fa_list : fa_head)); | 1335 | (fa ? &fa->fa_list : fa_head)); |
1334 | 1336 | ||
@@ -1347,52 +1349,86 @@ err: | |||
1347 | } | 1349 | } |
1348 | 1350 | ||
1349 | /* should be called with rcu_read_lock */ | 1351 | /* should be called with rcu_read_lock */ |
1350 | static int check_leaf(struct trie *t, struct leaf *l, | 1352 | static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, |
1351 | t_key key, const struct flowi *flp, | 1353 | t_key key, const struct flowi4 *flp, |
1352 | struct fib_result *res) | 1354 | struct fib_result *res, int fib_flags) |
1353 | { | 1355 | { |
1354 | struct leaf_info *li; | 1356 | struct leaf_info *li; |
1355 | struct hlist_head *hhead = &l->list; | 1357 | struct hlist_head *hhead = &l->list; |
1356 | struct hlist_node *node; | 1358 | struct hlist_node *node; |
1357 | 1359 | ||
1358 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { | 1360 | hlist_for_each_entry_rcu(li, node, hhead, hlist) { |
1359 | int err; | 1361 | struct fib_alias *fa; |
1360 | int plen = li->plen; | 1362 | int plen = li->plen; |
1361 | __be32 mask = inet_make_mask(plen); | 1363 | __be32 mask = inet_make_mask(plen); |
1362 | 1364 | ||
1363 | if (l->key != (key & ntohl(mask))) | 1365 | if (l->key != (key & ntohl(mask))) |
1364 | continue; | 1366 | continue; |
1365 | 1367 | ||
1366 | err = fib_semantic_match(&li->falh, flp, res, plen); | 1368 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
1369 | struct fib_info *fi = fa->fa_info; | ||
1370 | int nhsel, err; | ||
1367 | 1371 | ||
1372 | if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) | ||
1373 | continue; | ||
1374 | if (fa->fa_info->fib_scope < flp->flowi4_scope) | ||
1375 | continue; | ||
1376 | fib_alias_accessed(fa); | ||
1377 | err = fib_props[fa->fa_type].error; | ||
1378 | if (err) { | ||
1368 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1379 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
1369 | if (err <= 0) | 1380 | t->stats.semantic_match_passed++; |
1370 | t->stats.semantic_match_passed++; | 1381 | #endif |
1371 | else | 1382 | return err; |
1372 | t->stats.semantic_match_miss++; | 1383 | } |
1384 | if (fi->fib_flags & RTNH_F_DEAD) | ||
1385 | continue; | ||
1386 | for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { | ||
1387 | const struct fib_nh *nh = &fi->fib_nh[nhsel]; | ||
1388 | |||
1389 | if (nh->nh_flags & RTNH_F_DEAD) | ||
1390 | continue; | ||
1391 | if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) | ||
1392 | continue; | ||
1393 | |||
1394 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
1395 | t->stats.semantic_match_passed++; | ||
1396 | #endif | ||
1397 | res->prefixlen = plen; | ||
1398 | res->nh_sel = nhsel; | ||
1399 | res->type = fa->fa_type; | ||
1400 | res->scope = fa->fa_info->fib_scope; | ||
1401 | res->fi = fi; | ||
1402 | res->table = tb; | ||
1403 | res->fa_head = &li->falh; | ||
1404 | if (!(fib_flags & FIB_LOOKUP_NOREF)) | ||
1405 | atomic_inc(&res->fi->fib_clntref); | ||
1406 | return 0; | ||
1407 | } | ||
1408 | } | ||
1409 | |||
1410 | #ifdef CONFIG_IP_FIB_TRIE_STATS | ||
1411 | t->stats.semantic_match_miss++; | ||
1373 | #endif | 1412 | #endif |
1374 | if (err <= 0) | ||
1375 | return err; | ||
1376 | } | 1413 | } |
1377 | 1414 | ||
1378 | return 1; | 1415 | return 1; |
1379 | } | 1416 | } |
1380 | 1417 | ||
1381 | int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | 1418 | int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, |
1382 | struct fib_result *res) | 1419 | struct fib_result *res, int fib_flags) |
1383 | { | 1420 | { |
1384 | struct trie *t = (struct trie *) tb->tb_data; | 1421 | struct trie *t = (struct trie *) tb->tb_data; |
1385 | int ret; | 1422 | int ret; |
1386 | struct node *n; | 1423 | struct rt_trie_node *n; |
1387 | struct tnode *pn; | 1424 | struct tnode *pn; |
1388 | int pos, bits; | 1425 | unsigned int pos, bits; |
1389 | t_key key = ntohl(flp->fl4_dst); | 1426 | t_key key = ntohl(flp->daddr); |
1390 | int chopped_off; | 1427 | unsigned int chopped_off; |
1391 | t_key cindex = 0; | 1428 | t_key cindex = 0; |
1392 | int current_prefix_length = KEYLENGTH; | 1429 | unsigned int current_prefix_length = KEYLENGTH; |
1393 | struct tnode *cn; | 1430 | struct tnode *cn; |
1394 | t_key node_prefix, key_prefix, pref_mismatch; | 1431 | t_key pref_mismatch; |
1395 | int mp; | ||
1396 | 1432 | ||
1397 | rcu_read_lock(); | 1433 | rcu_read_lock(); |
1398 | 1434 | ||
@@ -1406,7 +1442,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1406 | 1442 | ||
1407 | /* Just a leaf? */ | 1443 | /* Just a leaf? */ |
1408 | if (IS_LEAF(n)) { | 1444 | if (IS_LEAF(n)) { |
1409 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); | 1445 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1410 | goto found; | 1446 | goto found; |
1411 | } | 1447 | } |
1412 | 1448 | ||
@@ -1431,7 +1467,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1431 | } | 1467 | } |
1432 | 1468 | ||
1433 | if (IS_LEAF(n)) { | 1469 | if (IS_LEAF(n)) { |
1434 | ret = check_leaf(t, (struct leaf *)n, key, flp, res); | 1470 | ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); |
1435 | if (ret > 0) | 1471 | if (ret > 0) |
1436 | goto backtrace; | 1472 | goto backtrace; |
1437 | goto found; | 1473 | goto found; |
@@ -1507,10 +1543,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1507 | * matching prefix. | 1543 | * matching prefix. |
1508 | */ | 1544 | */ |
1509 | 1545 | ||
1510 | node_prefix = mask_pfx(cn->key, cn->pos); | 1546 | pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); |
1511 | key_prefix = mask_pfx(key, cn->pos); | ||
1512 | pref_mismatch = key_prefix^node_prefix; | ||
1513 | mp = 0; | ||
1514 | 1547 | ||
1515 | /* | 1548 | /* |
1516 | * In short: If skipped bits in this node do not match | 1549 | * In short: If skipped bits in this node do not match |
@@ -1518,13 +1551,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1518 | * state.directly. | 1551 | * state.directly. |
1519 | */ | 1552 | */ |
1520 | if (pref_mismatch) { | 1553 | if (pref_mismatch) { |
1521 | while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { | 1554 | int mp = KEYLENGTH - fls(pref_mismatch); |
1522 | mp++; | ||
1523 | pref_mismatch = pref_mismatch << 1; | ||
1524 | } | ||
1525 | key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp); | ||
1526 | 1555 | ||
1527 | if (key_prefix != 0) | 1556 | if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0) |
1528 | goto backtrace; | 1557 | goto backtrace; |
1529 | 1558 | ||
1530 | if (current_prefix_length >= cn->pos) | 1559 | if (current_prefix_length >= cn->pos) |
@@ -1556,7 +1585,7 @@ backtrace: | |||
1556 | if (chopped_off <= pn->bits) { | 1585 | if (chopped_off <= pn->bits) { |
1557 | cindex &= ~(1 << (chopped_off-1)); | 1586 | cindex &= ~(1 << (chopped_off-1)); |
1558 | } else { | 1587 | } else { |
1559 | struct tnode *parent = node_parent_rcu((struct node *) pn); | 1588 | struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); |
1560 | if (!parent) | 1589 | if (!parent) |
1561 | goto failed; | 1590 | goto failed; |
1562 | 1591 | ||
@@ -1583,7 +1612,7 @@ found: | |||
1583 | */ | 1612 | */ |
1584 | static void trie_leaf_remove(struct trie *t, struct leaf *l) | 1613 | static void trie_leaf_remove(struct trie *t, struct leaf *l) |
1585 | { | 1614 | { |
1586 | struct tnode *tp = node_parent((struct node *) l); | 1615 | struct tnode *tp = node_parent((struct rt_trie_node *) l); |
1587 | 1616 | ||
1588 | pr_debug("entering trie_leaf_remove(%p)\n", l); | 1617 | pr_debug("entering trie_leaf_remove(%p)\n", l); |
1589 | 1618 | ||
@@ -1644,7 +1673,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1644 | 1673 | ||
1645 | if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && | 1674 | if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && |
1646 | (cfg->fc_scope == RT_SCOPE_NOWHERE || | 1675 | (cfg->fc_scope == RT_SCOPE_NOWHERE || |
1647 | fa->fa_scope == cfg->fc_scope) && | 1676 | fa->fa_info->fib_scope == cfg->fc_scope) && |
1677 | (!cfg->fc_prefsrc || | ||
1678 | fi->fib_prefsrc == cfg->fc_prefsrc) && | ||
1648 | (!cfg->fc_protocol || | 1679 | (!cfg->fc_protocol || |
1649 | fi->fib_protocol == cfg->fc_protocol) && | 1680 | fi->fib_protocol == cfg->fc_protocol) && |
1650 | fib_nh_match(cfg, fi) == 0) { | 1681 | fib_nh_match(cfg, fi) == 0) { |
@@ -1665,6 +1696,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1665 | 1696 | ||
1666 | list_del_rcu(&fa->fa_list); | 1697 | list_del_rcu(&fa->fa_list); |
1667 | 1698 | ||
1699 | if (!plen) | ||
1700 | tb->tb_num_default--; | ||
1701 | |||
1668 | if (list_empty(fa_head)) { | 1702 | if (list_empty(fa_head)) { |
1669 | hlist_del_rcu(&li->hlist); | 1703 | hlist_del_rcu(&li->hlist); |
1670 | free_leaf_info(li); | 1704 | free_leaf_info(li); |
@@ -1721,7 +1755,7 @@ static int trie_flush_leaf(struct leaf *l) | |||
1721 | * Scan for the next right leaf starting at node p->child[idx] | 1755 | * Scan for the next right leaf starting at node p->child[idx] |
1722 | * Since we have back pointer, no recursion necessary. | 1756 | * Since we have back pointer, no recursion necessary. |
1723 | */ | 1757 | */ |
1724 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | 1758 | static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) |
1725 | { | 1759 | { |
1726 | do { | 1760 | do { |
1727 | t_key idx; | 1761 | t_key idx; |
@@ -1737,7 +1771,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
1737 | continue; | 1771 | continue; |
1738 | 1772 | ||
1739 | if (IS_LEAF(c)) { | 1773 | if (IS_LEAF(c)) { |
1740 | prefetch(p->child[idx]); | 1774 | prefetch(rcu_dereference_rtnl(p->child[idx])); |
1741 | return (struct leaf *) c; | 1775 | return (struct leaf *) c; |
1742 | } | 1776 | } |
1743 | 1777 | ||
@@ -1747,17 +1781,15 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) | |||
1747 | } | 1781 | } |
1748 | 1782 | ||
1749 | /* Node empty, walk back up to parent */ | 1783 | /* Node empty, walk back up to parent */ |
1750 | c = (struct node *) p; | 1784 | c = (struct rt_trie_node *) p; |
1751 | } while ( (p = node_parent_rcu(c)) != NULL); | 1785 | } while ((p = node_parent_rcu(c)) != NULL); |
1752 | 1786 | ||
1753 | return NULL; /* Root of trie */ | 1787 | return NULL; /* Root of trie */ |
1754 | } | 1788 | } |
1755 | 1789 | ||
1756 | static struct leaf *trie_firstleaf(struct trie *t) | 1790 | static struct leaf *trie_firstleaf(struct trie *t) |
1757 | { | 1791 | { |
1758 | struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, | 1792 | struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie); |
1759 | rcu_read_lock_held() || | ||
1760 | lockdep_rtnl_is_held()); | ||
1761 | 1793 | ||
1762 | if (!n) | 1794 | if (!n) |
1763 | return NULL; | 1795 | return NULL; |
@@ -1770,7 +1802,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
1770 | 1802 | ||
1771 | static struct leaf *trie_nextleaf(struct leaf *l) | 1803 | static struct leaf *trie_nextleaf(struct leaf *l) |
1772 | { | 1804 | { |
1773 | struct node *c = (struct node *) l; | 1805 | struct rt_trie_node *c = (struct rt_trie_node *) l; |
1774 | struct tnode *p = node_parent_rcu(c); | 1806 | struct tnode *p = node_parent_rcu(c); |
1775 | 1807 | ||
1776 | if (!p) | 1808 | if (!p) |
@@ -1814,77 +1846,9 @@ int fib_table_flush(struct fib_table *tb) | |||
1814 | return found; | 1846 | return found; |
1815 | } | 1847 | } |
1816 | 1848 | ||
1817 | void fib_table_select_default(struct fib_table *tb, | 1849 | void fib_free_table(struct fib_table *tb) |
1818 | const struct flowi *flp, | ||
1819 | struct fib_result *res) | ||
1820 | { | 1850 | { |
1821 | struct trie *t = (struct trie *) tb->tb_data; | 1851 | kfree(tb); |
1822 | int order, last_idx; | ||
1823 | struct fib_info *fi = NULL; | ||
1824 | struct fib_info *last_resort; | ||
1825 | struct fib_alias *fa = NULL; | ||
1826 | struct list_head *fa_head; | ||
1827 | struct leaf *l; | ||
1828 | |||
1829 | last_idx = -1; | ||
1830 | last_resort = NULL; | ||
1831 | order = -1; | ||
1832 | |||
1833 | rcu_read_lock(); | ||
1834 | |||
1835 | l = fib_find_node(t, 0); | ||
1836 | if (!l) | ||
1837 | goto out; | ||
1838 | |||
1839 | fa_head = get_fa_head(l, 0); | ||
1840 | if (!fa_head) | ||
1841 | goto out; | ||
1842 | |||
1843 | if (list_empty(fa_head)) | ||
1844 | goto out; | ||
1845 | |||
1846 | list_for_each_entry_rcu(fa, fa_head, fa_list) { | ||
1847 | struct fib_info *next_fi = fa->fa_info; | ||
1848 | |||
1849 | if (fa->fa_scope != res->scope || | ||
1850 | fa->fa_type != RTN_UNICAST) | ||
1851 | continue; | ||
1852 | |||
1853 | if (next_fi->fib_priority > res->fi->fib_priority) | ||
1854 | break; | ||
1855 | if (!next_fi->fib_nh[0].nh_gw || | ||
1856 | next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | ||
1857 | continue; | ||
1858 | fa->fa_state |= FA_S_ACCESSED; | ||
1859 | |||
1860 | if (fi == NULL) { | ||
1861 | if (next_fi != res->fi) | ||
1862 | break; | ||
1863 | } else if (!fib_detect_death(fi, order, &last_resort, | ||
1864 | &last_idx, tb->tb_default)) { | ||
1865 | fib_result_assign(res, fi); | ||
1866 | tb->tb_default = order; | ||
1867 | goto out; | ||
1868 | } | ||
1869 | fi = next_fi; | ||
1870 | order++; | ||
1871 | } | ||
1872 | if (order <= 0 || fi == NULL) { | ||
1873 | tb->tb_default = -1; | ||
1874 | goto out; | ||
1875 | } | ||
1876 | |||
1877 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | ||
1878 | tb->tb_default)) { | ||
1879 | fib_result_assign(res, fi); | ||
1880 | tb->tb_default = order; | ||
1881 | goto out; | ||
1882 | } | ||
1883 | if (last_idx >= 0) | ||
1884 | fib_result_assign(res, last_resort); | ||
1885 | tb->tb_default = last_idx; | ||
1886 | out: | ||
1887 | rcu_read_unlock(); | ||
1888 | } | 1852 | } |
1889 | 1853 | ||
1890 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | 1854 | static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, |
@@ -1911,7 +1875,6 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, | |||
1911 | RTM_NEWROUTE, | 1875 | RTM_NEWROUTE, |
1912 | tb->tb_id, | 1876 | tb->tb_id, |
1913 | fa->fa_type, | 1877 | fa->fa_type, |
1914 | fa->fa_scope, | ||
1915 | xkey, | 1878 | xkey, |
1916 | plen, | 1879 | plen, |
1917 | fa->fa_tos, | 1880 | fa->fa_tos, |
@@ -2001,7 +1964,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, | |||
2001 | return skb->len; | 1964 | return skb->len; |
2002 | } | 1965 | } |
2003 | 1966 | ||
2004 | void __init fib_hash_init(void) | 1967 | void __init fib_trie_init(void) |
2005 | { | 1968 | { |
2006 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", | 1969 | fn_alias_kmem = kmem_cache_create("ip_fib_alias", |
2007 | sizeof(struct fib_alias), | 1970 | sizeof(struct fib_alias), |
@@ -2014,8 +1977,7 @@ void __init fib_hash_init(void) | |||
2014 | } | 1977 | } |
2015 | 1978 | ||
2016 | 1979 | ||
2017 | /* Fix more generic FIB names for init later */ | 1980 | struct fib_table *fib_trie_table(u32 id) |
2018 | struct fib_table *fib_hash_table(u32 id) | ||
2019 | { | 1981 | { |
2020 | struct fib_table *tb; | 1982 | struct fib_table *tb; |
2021 | struct trie *t; | 1983 | struct trie *t; |
@@ -2027,13 +1989,11 @@ struct fib_table *fib_hash_table(u32 id) | |||
2027 | 1989 | ||
2028 | tb->tb_id = id; | 1990 | tb->tb_id = id; |
2029 | tb->tb_default = -1; | 1991 | tb->tb_default = -1; |
1992 | tb->tb_num_default = 0; | ||
2030 | 1993 | ||
2031 | t = (struct trie *) tb->tb_data; | 1994 | t = (struct trie *) tb->tb_data; |
2032 | memset(t, 0, sizeof(*t)); | 1995 | memset(t, 0, sizeof(*t)); |
2033 | 1996 | ||
2034 | if (id == RT_TABLE_LOCAL) | ||
2035 | pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION); | ||
2036 | |||
2037 | return tb; | 1997 | return tb; |
2038 | } | 1998 | } |
2039 | 1999 | ||
@@ -2043,14 +2003,14 @@ struct fib_trie_iter { | |||
2043 | struct seq_net_private p; | 2003 | struct seq_net_private p; |
2044 | struct fib_table *tb; | 2004 | struct fib_table *tb; |
2045 | struct tnode *tnode; | 2005 | struct tnode *tnode; |
2046 | unsigned index; | 2006 | unsigned int index; |
2047 | unsigned depth; | 2007 | unsigned int depth; |
2048 | }; | 2008 | }; |
2049 | 2009 | ||
2050 | static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | 2010 | static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) |
2051 | { | 2011 | { |
2052 | struct tnode *tn = iter->tnode; | 2012 | struct tnode *tn = iter->tnode; |
2053 | unsigned cindex = iter->index; | 2013 | unsigned int cindex = iter->index; |
2054 | struct tnode *p; | 2014 | struct tnode *p; |
2055 | 2015 | ||
2056 | /* A single entry routing table */ | 2016 | /* A single entry routing table */ |
@@ -2061,7 +2021,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) | |||
2061 | iter->tnode, iter->index, iter->depth); | 2021 | iter->tnode, iter->index, iter->depth); |
2062 | rescan: | 2022 | rescan: |
2063 | while (cindex < (1<<tn->bits)) { | 2023 | while (cindex < (1<<tn->bits)) { |
2064 | struct node *n = tnode_get_child_rcu(tn, cindex); | 2024 | struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); |
2065 | 2025 | ||
2066 | if (n) { | 2026 | if (n) { |
2067 | if (IS_LEAF(n)) { | 2027 | if (IS_LEAF(n)) { |
@@ -2080,7 +2040,7 @@ rescan: | |||
2080 | } | 2040 | } |
2081 | 2041 | ||
2082 | /* Current node exhausted, pop back up */ | 2042 | /* Current node exhausted, pop back up */ |
2083 | p = node_parent_rcu((struct node *)tn); | 2043 | p = node_parent_rcu((struct rt_trie_node *)tn); |
2084 | if (p) { | 2044 | if (p) { |
2085 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; | 2045 | cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; |
2086 | tn = p; | 2046 | tn = p; |
@@ -2092,10 +2052,10 @@ rescan: | |||
2092 | return NULL; | 2052 | return NULL; |
2093 | } | 2053 | } |
2094 | 2054 | ||
2095 | static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | 2055 | static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, |
2096 | struct trie *t) | 2056 | struct trie *t) |
2097 | { | 2057 | { |
2098 | struct node *n; | 2058 | struct rt_trie_node *n; |
2099 | 2059 | ||
2100 | if (!t) | 2060 | if (!t) |
2101 | return NULL; | 2061 | return NULL; |
@@ -2119,7 +2079,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, | |||
2119 | 2079 | ||
2120 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) | 2080 | static void trie_collect_stats(struct trie *t, struct trie_stat *s) |
2121 | { | 2081 | { |
2122 | struct node *n; | 2082 | struct rt_trie_node *n; |
2123 | struct fib_trie_iter iter; | 2083 | struct fib_trie_iter iter; |
2124 | 2084 | ||
2125 | memset(s, 0, sizeof(*s)); | 2085 | memset(s, 0, sizeof(*s)); |
@@ -2159,7 +2119,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s) | |||
2159 | */ | 2119 | */ |
2160 | static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) | 2120 | static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) |
2161 | { | 2121 | { |
2162 | unsigned i, max, pointers, bytes, avdepth; | 2122 | unsigned int i, max, pointers, bytes, avdepth; |
2163 | 2123 | ||
2164 | if (stat->leaves) | 2124 | if (stat->leaves) |
2165 | avdepth = stat->totdepth*100 / stat->leaves; | 2125 | avdepth = stat->totdepth*100 / stat->leaves; |
@@ -2192,7 +2152,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) | |||
2192 | seq_putc(seq, '\n'); | 2152 | seq_putc(seq, '\n'); |
2193 | seq_printf(seq, "\tPointers: %u\n", pointers); | 2153 | seq_printf(seq, "\tPointers: %u\n", pointers); |
2194 | 2154 | ||
2195 | bytes += sizeof(struct node *) * pointers; | 2155 | bytes += sizeof(struct rt_trie_node *) * pointers; |
2196 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); | 2156 | seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); |
2197 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); | 2157 | seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); |
2198 | } | 2158 | } |
@@ -2273,7 +2233,7 @@ static const struct file_operations fib_triestat_fops = { | |||
2273 | .release = single_release_net, | 2233 | .release = single_release_net, |
2274 | }; | 2234 | }; |
2275 | 2235 | ||
2276 | static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | 2236 | static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) |
2277 | { | 2237 | { |
2278 | struct fib_trie_iter *iter = seq->private; | 2238 | struct fib_trie_iter *iter = seq->private; |
2279 | struct net *net = seq_file_net(seq); | 2239 | struct net *net = seq_file_net(seq); |
@@ -2286,7 +2246,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) | |||
2286 | struct fib_table *tb; | 2246 | struct fib_table *tb; |
2287 | 2247 | ||
2288 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { | 2248 | hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { |
2289 | struct node *n; | 2249 | struct rt_trie_node *n; |
2290 | 2250 | ||
2291 | for (n = fib_trie_get_first(iter, | 2251 | for (n = fib_trie_get_first(iter, |
2292 | (struct trie *) tb->tb_data); | 2252 | (struct trie *) tb->tb_data); |
@@ -2315,7 +2275,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2315 | struct fib_table *tb = iter->tb; | 2275 | struct fib_table *tb = iter->tb; |
2316 | struct hlist_node *tb_node; | 2276 | struct hlist_node *tb_node; |
2317 | unsigned int h; | 2277 | unsigned int h; |
2318 | struct node *n; | 2278 | struct rt_trie_node *n; |
2319 | 2279 | ||
2320 | ++*pos; | 2280 | ++*pos; |
2321 | /* next node in same table */ | 2281 | /* next node in same table */ |
@@ -2325,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2325 | 2285 | ||
2326 | /* walk rest of this hash chain */ | 2286 | /* walk rest of this hash chain */ |
2327 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); | 2287 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2328 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { | 2288 | while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { |
2329 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | 2289 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); |
2330 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | 2290 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); |
2331 | if (n) | 2291 | if (n) |
@@ -2356,7 +2316,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v) | |||
2356 | 2316 | ||
2357 | static void seq_indent(struct seq_file *seq, int n) | 2317 | static void seq_indent(struct seq_file *seq, int n) |
2358 | { | 2318 | { |
2359 | while (n-- > 0) seq_puts(seq, " "); | 2319 | while (n-- > 0) |
2320 | seq_puts(seq, " "); | ||
2360 | } | 2321 | } |
2361 | 2322 | ||
2362 | static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) | 2323 | static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) |
@@ -2388,7 +2349,7 @@ static const char *const rtn_type_names[__RTN_MAX] = { | |||
2388 | [RTN_XRESOLVE] = "XRESOLVE", | 2349 | [RTN_XRESOLVE] = "XRESOLVE", |
2389 | }; | 2350 | }; |
2390 | 2351 | ||
2391 | static inline const char *rtn_type(char *buf, size_t len, unsigned t) | 2352 | static inline const char *rtn_type(char *buf, size_t len, unsigned int t) |
2392 | { | 2353 | { |
2393 | if (t < __RTN_MAX && rtn_type_names[t]) | 2354 | if (t < __RTN_MAX && rtn_type_names[t]) |
2394 | return rtn_type_names[t]; | 2355 | return rtn_type_names[t]; |
@@ -2400,7 +2361,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned t) | |||
2400 | static int fib_trie_seq_show(struct seq_file *seq, void *v) | 2361 | static int fib_trie_seq_show(struct seq_file *seq, void *v) |
2401 | { | 2362 | { |
2402 | const struct fib_trie_iter *iter = seq->private; | 2363 | const struct fib_trie_iter *iter = seq->private; |
2403 | struct node *n = v; | 2364 | struct rt_trie_node *n = v; |
2404 | 2365 | ||
2405 | if (!node_parent_rcu(n)) | 2366 | if (!node_parent_rcu(n)) |
2406 | fib_table_print(seq, iter->tb); | 2367 | fib_table_print(seq, iter->tb); |
@@ -2432,7 +2393,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2432 | seq_indent(seq, iter->depth+1); | 2393 | seq_indent(seq, iter->depth+1); |
2433 | seq_printf(seq, " /%d %s %s", li->plen, | 2394 | seq_printf(seq, " /%d %s %s", li->plen, |
2434 | rtn_scope(buf1, sizeof(buf1), | 2395 | rtn_scope(buf1, sizeof(buf1), |
2435 | fa->fa_scope), | 2396 | fa->fa_info->fib_scope), |
2436 | rtn_type(buf2, sizeof(buf2), | 2397 | rtn_type(buf2, sizeof(buf2), |
2437 | fa->fa_type)); | 2398 | fa->fa_type)); |
2438 | if (fa->fa_tos) | 2399 | if (fa->fa_tos) |
@@ -2544,13 +2505,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v) | |||
2544 | rcu_read_unlock(); | 2505 | rcu_read_unlock(); |
2545 | } | 2506 | } |
2546 | 2507 | ||
2547 | static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) | 2508 | static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) |
2548 | { | 2509 | { |
2549 | static unsigned type2flags[RTN_MAX + 1] = { | 2510 | unsigned int flags = 0; |
2550 | [7] = RTF_REJECT, [8] = RTF_REJECT, | ||
2551 | }; | ||
2552 | unsigned flags = type2flags[type]; | ||
2553 | 2511 | ||
2512 | if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) | ||
2513 | flags = RTF_REJECT; | ||
2554 | if (fi && fi->fib_nh->nh_gw) | 2514 | if (fi && fi->fib_nh->nh_gw) |
2555 | flags |= RTF_GATEWAY; | 2515 | flags |= RTF_GATEWAY; |
2556 | if (mask == htonl(0xFFFFFFFF)) | 2516 | if (mask == htonl(0xFFFFFFFF)) |
@@ -2562,7 +2522,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) | |||
2562 | /* | 2522 | /* |
2563 | * This outputs /proc/net/route. | 2523 | * This outputs /proc/net/route. |
2564 | * The format of the file is not supposed to be changed | 2524 | * The format of the file is not supposed to be changed |
2565 | * and needs to be same as fib_hash output to avoid breaking | 2525 | * and needs to be same as fib_hash output to avoid breaking |
2566 | * legacy utilities | 2526 | * legacy utilities |
2567 | */ | 2527 | */ |
2568 | static int fib_route_seq_show(struct seq_file *seq, void *v) | 2528 | static int fib_route_seq_show(struct seq_file *seq, void *v) |
@@ -2587,7 +2547,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) | |||
2587 | 2547 | ||
2588 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { | 2548 | list_for_each_entry_rcu(fa, &li->falh, fa_list) { |
2589 | const struct fib_info *fi = fa->fa_info; | 2549 | const struct fib_info *fi = fa->fa_info; |
2590 | unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); | 2550 | unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); |
2591 | int len; | 2551 | int len; |
2592 | 2552 | ||
2593 | if (fa->fa_type == RTN_BROADCAST | 2553 | if (fa->fa_type == RTN_BROADCAST |