aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/fib_trie.c192
1 files changed, 82 insertions, 110 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d3dbb4821ae8..2b7220728b24 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,24 +87,38 @@
87 87
88typedef unsigned int t_key; 88typedef unsigned int t_key;
89 89
90#define T_TNODE 0 90#define IS_TNODE(n) ((n)->bits)
91#define T_LEAF 1 91#define IS_LEAF(n) (!(n)->bits)
92#define NODE_TYPE_MASK 0x1UL
93#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
94 92
95#define IS_TNODE(n) (!(n->parent & T_LEAF)) 93struct tnode {
96#define IS_LEAF(n) (n->parent & T_LEAF) 94 t_key key;
95 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
96 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
97 struct tnode __rcu *parent;
98 union {
99 struct rcu_head rcu;
100 struct tnode *tnode_free;
101 };
102 unsigned int full_children; /* KEYLENGTH bits needed */
103 unsigned int empty_children; /* KEYLENGTH bits needed */
104 struct rt_trie_node __rcu *child[0];
105};
97 106
98struct rt_trie_node { 107struct rt_trie_node {
99 unsigned long parent;
100 t_key key; 108 t_key key;
109 unsigned char bits;
110 unsigned char pos;
111 struct tnode __rcu *parent;
112 struct rcu_head rcu;
101}; 113};
102 114
103struct leaf { 115struct leaf {
104 unsigned long parent;
105 t_key key; 116 t_key key;
106 struct hlist_head list; 117 unsigned char bits;
118 unsigned char pos;
119 struct tnode __rcu *parent;
107 struct rcu_head rcu; 120 struct rcu_head rcu;
121 struct hlist_head list;
108}; 122};
109 123
110struct leaf_info { 124struct leaf_info {
@@ -115,20 +129,6 @@ struct leaf_info {
115 struct rcu_head rcu; 129 struct rcu_head rcu;
116}; 130};
117 131
118struct tnode {
119 unsigned long parent;
120 t_key key;
121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
125 union {
126 struct rcu_head rcu;
127 struct tnode *tnode_free;
128 };
129 struct rt_trie_node __rcu *child[0];
130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats { 133struct trie_use_stats {
134 unsigned int gets; 134 unsigned int gets;
@@ -176,38 +176,27 @@ static const int sync_pages = 128;
176static struct kmem_cache *fn_alias_kmem __read_mostly; 176static struct kmem_cache *fn_alias_kmem __read_mostly;
177static struct kmem_cache *trie_leaf_kmem __read_mostly; 177static struct kmem_cache *trie_leaf_kmem __read_mostly;
178 178
179/* 179/* caller must hold RTNL */
180 * caller must hold RTNL 180#define node_parent(n) rtnl_dereference((n)->parent)
181 */
182static inline struct tnode *node_parent(const struct rt_trie_node *node)
183{
184 unsigned long parent;
185 181
186 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); 182/* caller must hold RCU read lock or RTNL */
183#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
187 184
188 return (struct tnode *)(parent & ~NODE_TYPE_MASK); 185/* wrapper for rcu_assign_pointer */
189} 186static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
190
191/*
192 * caller must hold RCU read lock or RTNL
193 */
194static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
195{ 187{
196 unsigned long parent; 188 if (node)
197 189 rcu_assign_pointer(node->parent, ptr);
198 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
199 lockdep_rtnl_is_held());
200
201 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
202} 190}
203 191
204/* Same as rcu_assign_pointer 192#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
205 * but that macro() assumes that value is a pointer. 193
194/* This provides us with the number of children in this node, in the case of a
195 * leaf this will return 0 meaning none of the children are accessible.
206 */ 196 */
207static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) 197static inline int tnode_child_length(const struct tnode *tn)
208{ 198{
209 smp_wmb(); 199 return (1ul << tn->bits) & ~(1ul);
210 node->parent = (unsigned long)ptr | NODE_TYPE(node);
211} 200}
212 201
213/* 202/*
@@ -215,7 +204,7 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
215 */ 204 */
216static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) 205static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
217{ 206{
218 BUG_ON(i >= 1U << tn->bits); 207 BUG_ON(i >= tnode_child_length(tn));
219 208
220 return rtnl_dereference(tn->child[i]); 209 return rtnl_dereference(tn->child[i]);
221} 210}
@@ -225,16 +214,11 @@ static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsig
225 */ 214 */
226static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) 215static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
227{ 216{
228 BUG_ON(i >= 1U << tn->bits); 217 BUG_ON(i >= tnode_child_length(tn));
229 218
230 return rcu_dereference_rtnl(tn->child[i]); 219 return rcu_dereference_rtnl(tn->child[i]);
231} 220}
232 221
233static inline int tnode_child_length(const struct tnode *tn)
234{
235 return 1 << tn->bits;
236}
237
238static inline t_key mask_pfx(t_key k, unsigned int l) 222static inline t_key mask_pfx(t_key k, unsigned int l)
239{ 223{
240 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 224 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
@@ -336,11 +320,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
336 320
337*/ 321*/
338 322
339static inline void check_tnode(const struct tnode *tn)
340{
341 WARN_ON(tn && tn->pos+tn->bits > 32);
342}
343
344static const int halve_threshold = 25; 323static const int halve_threshold = 25;
345static const int inflate_threshold = 50; 324static const int inflate_threshold = 50;
346static const int halve_threshold_root = 15; 325static const int halve_threshold_root = 15;
@@ -426,11 +405,20 @@ static void tnode_free_flush(void)
426 } 405 }
427} 406}
428 407
429static struct leaf *leaf_new(void) 408static struct leaf *leaf_new(t_key key)
430{ 409{
431 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 410 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
432 if (l) { 411 if (l) {
433 l->parent = T_LEAF; 412 l->parent = NULL;
413 /* set key and pos to reflect full key value
414 * any trailing zeros in the key should be ignored
415 * as the nodes are searched
416 */
417 l->key = key;
418 l->pos = KEYLENGTH;
419 /* set bits to 0 indicating we are not a tnode */
420 l->bits = 0;
421
434 INIT_HLIST_HEAD(&l->list); 422 INIT_HLIST_HEAD(&l->list);
435 } 423 }
436 return l; 424 return l;
@@ -451,12 +439,16 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
451{ 439{
452 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); 440 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
453 struct tnode *tn = tnode_alloc(sz); 441 struct tnode *tn = tnode_alloc(sz);
442 unsigned int shift = pos + bits;
443
444 /* verify bits and pos their msb bits clear and values are valid */
445 BUG_ON(!bits || (shift > KEYLENGTH));
454 446
455 if (tn) { 447 if (tn) {
456 tn->parent = T_TNODE; 448 tn->parent = NULL;
457 tn->pos = pos; 449 tn->pos = pos;
458 tn->bits = bits; 450 tn->bits = bits;
459 tn->key = key; 451 tn->key = mask_pfx(key, pos);
460 tn->full_children = 0; 452 tn->full_children = 0;
461 tn->empty_children = 1<<bits; 453 tn->empty_children = 1<<bits;
462 } 454 }
@@ -473,10 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
473 465
474static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) 466static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
475{ 467{
476 if (n == NULL || IS_LEAF(n)) 468 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
477 return 0;
478
479 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
480} 469}
481 470
482static inline void put_child(struct tnode *tn, int i, 471static inline void put_child(struct tnode *tn, int i,
@@ -514,8 +503,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
514 else if (!wasfull && isfull) 503 else if (!wasfull && isfull)
515 tn->full_children++; 504 tn->full_children++;
516 505
517 if (n) 506 node_set_parent(n, tn);
518 node_set_parent(n, tn);
519 507
520 rcu_assign_pointer(tn->child[i], n); 508 rcu_assign_pointer(tn->child[i], n);
521} 509}
@@ -523,7 +511,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
523#define MAX_WORK 10 511#define MAX_WORK 10
524static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) 512static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
525{ 513{
526 int i; 514 struct rt_trie_node *n = NULL;
527 struct tnode *old_tn; 515 struct tnode *old_tn;
528 int inflate_threshold_use; 516 int inflate_threshold_use;
529 int halve_threshold_use; 517 int halve_threshold_use;
@@ -536,12 +524,11 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
536 tn, inflate_threshold, halve_threshold); 524 tn, inflate_threshold, halve_threshold);
537 525
538 /* No children */ 526 /* No children */
539 if (tn->empty_children == tnode_child_length(tn)) { 527 if (tn->empty_children > (tnode_child_length(tn) - 1))
540 tnode_free_safe(tn); 528 goto no_children;
541 return NULL; 529
542 }
543 /* One child */ 530 /* One child */
544 if (tn->empty_children == tnode_child_length(tn) - 1) 531 if (tn->empty_children == (tnode_child_length(tn) - 1))
545 goto one_child; 532 goto one_child;
546 /* 533 /*
547 * Double as long as the resulting node has a number of 534 * Double as long as the resulting node has a number of
@@ -607,11 +594,9 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
607 * 594 *
608 */ 595 */
609 596
610 check_tnode(tn);
611
612 /* Keep root node larger */ 597 /* Keep root node larger */
613 598
614 if (!node_parent((struct rt_trie_node *)tn)) { 599 if (!node_parent(tn)) {
615 inflate_threshold_use = inflate_threshold_root; 600 inflate_threshold_use = inflate_threshold_root;
616 halve_threshold_use = halve_threshold_root; 601 halve_threshold_use = halve_threshold_root;
617 } else { 602 } else {
@@ -637,8 +622,6 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
637 } 622 }
638 } 623 }
639 624
640 check_tnode(tn);
641
642 /* Return if at least one inflate is run */ 625 /* Return if at least one inflate is run */
643 if (max_work != MAX_WORK) 626 if (max_work != MAX_WORK)
644 return (struct rt_trie_node *) tn; 627 return (struct rt_trie_node *) tn;
@@ -666,21 +649,16 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
666 649
667 650
668 /* Only one child remains */ 651 /* Only one child remains */
669 if (tn->empty_children == tnode_child_length(tn) - 1) { 652 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
653 unsigned long i;
670one_child: 654one_child:
671 for (i = 0; i < tnode_child_length(tn); i++) { 655 for (i = tnode_child_length(tn); !n && i;)
672 struct rt_trie_node *n; 656 n = tnode_get_child(tn, --i);
673 657no_children:
674 n = rtnl_dereference(tn->child[i]); 658 /* compress one level */
675 if (!n) 659 node_set_parent(n, NULL);
676 continue; 660 tnode_free_safe(tn);
677 661 return n;
678 /* compress one level */
679
680 node_set_parent(n, NULL);
681 tnode_free_safe(tn);
682 return n;
683 }
684 } 662 }
685 return (struct rt_trie_node *) tn; 663 return (struct rt_trie_node *) tn;
686} 664}
@@ -760,8 +738,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
760 738
761 /* A leaf or an internal node with skipped bits */ 739 /* A leaf or an internal node with skipped bits */
762 740
763 if (IS_LEAF(node) || ((struct tnode *) node)->pos > 741 if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
764 tn->pos + tn->bits - 1) {
765 put_child(tn, 742 put_child(tn,
766 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), 743 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
767 node); 744 node);
@@ -958,11 +935,9 @@ fib_find_node(struct trie *t, u32 key)
958 pos = 0; 935 pos = 0;
959 n = rcu_dereference_rtnl(t->trie); 936 n = rcu_dereference_rtnl(t->trie);
960 937
961 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 938 while (n && IS_TNODE(n)) {
962 tn = (struct tnode *) n; 939 tn = (struct tnode *) n;
963 940
964 check_tnode(tn);
965
966 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 941 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
967 pos = tn->pos + tn->bits; 942 pos = tn->pos + tn->bits;
968 n = tnode_get_child_rcu(tn, 943 n = tnode_get_child_rcu(tn,
@@ -988,7 +963,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
988 963
989 key = tn->key; 964 key = tn->key;
990 965
991 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { 966 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
992 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 967 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
993 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 968 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
994 tn = (struct tnode *)resize(t, tn); 969 tn = (struct tnode *)resize(t, tn);
@@ -996,7 +971,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
996 tnode_put_child_reorg(tp, cindex, 971 tnode_put_child_reorg(tp, cindex,
997 (struct rt_trie_node *)tn, wasfull); 972 (struct rt_trie_node *)tn, wasfull);
998 973
999 tp = node_parent((struct rt_trie_node *) tn); 974 tp = node_parent(tn);
1000 if (!tp) 975 if (!tp)
1001 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 976 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1002 977
@@ -1048,11 +1023,9 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1048 * If it doesn't, we need to replace it with a T_TNODE. 1023 * If it doesn't, we need to replace it with a T_TNODE.
1049 */ 1024 */
1050 1025
1051 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 1026 while (n && IS_TNODE(n)) {
1052 tn = (struct tnode *) n; 1027 tn = (struct tnode *) n;
1053 1028
1054 check_tnode(tn);
1055
1056 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 1029 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1057 tp = tn; 1030 tp = tn;
1058 pos = tn->pos + tn->bits; 1031 pos = tn->pos + tn->bits;
@@ -1087,12 +1060,11 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1087 insert_leaf_info(&l->list, li); 1060 insert_leaf_info(&l->list, li);
1088 goto done; 1061 goto done;
1089 } 1062 }
1090 l = leaf_new(); 1063 l = leaf_new(key);
1091 1064
1092 if (!l) 1065 if (!l)
1093 return NULL; 1066 return NULL;
1094 1067
1095 l->key = key;
1096 li = leaf_info_new(plen); 1068 li = leaf_info_new(plen);
1097 1069
1098 if (!li) { 1070 if (!li) {
@@ -1569,7 +1541,7 @@ backtrace:
1569 if (chopped_off <= pn->bits) { 1541 if (chopped_off <= pn->bits) {
1570 cindex &= ~(1 << (chopped_off-1)); 1542 cindex &= ~(1 << (chopped_off-1));
1571 } else { 1543 } else {
1572 struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); 1544 struct tnode *parent = node_parent_rcu(pn);
1573 if (!parent) 1545 if (!parent)
1574 goto failed; 1546 goto failed;
1575 1547
@@ -1597,7 +1569,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup);
1597 */ 1569 */
1598static void trie_leaf_remove(struct trie *t, struct leaf *l) 1570static void trie_leaf_remove(struct trie *t, struct leaf *l)
1599{ 1571{
1600 struct tnode *tp = node_parent((struct rt_trie_node *) l); 1572 struct tnode *tp = node_parent(l);
1601 1573
1602 pr_debug("entering trie_leaf_remove(%p)\n", l); 1574 pr_debug("entering trie_leaf_remove(%p)\n", l);
1603 1575
@@ -2374,7 +2346,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2374 2346
2375 if (IS_TNODE(n)) { 2347 if (IS_TNODE(n)) {
2376 struct tnode *tn = (struct tnode *) n; 2348 struct tnode *tn = (struct tnode *) n;
2377 __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2349 __be32 prf = htonl(tn->key);
2378 2350
2379 seq_indent(seq, iter->depth-1); 2351 seq_indent(seq, iter->depth-1);
2380 seq_printf(seq, " +-- %pI4/%d %d %d %d\n", 2352 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",