aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2014-12-31 13:55:35 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-31 18:25:54 -0500
commit64c9b6fb26ebcc82e36438d4084f2258f29dbadf (patch)
treebaa87b4d1e3cbb74096830275d4b46828212d7a4 /net/ipv4
parent8274a97aa4c694ad0d7b31b283a89dcca140e62b (diff)
fib_trie: Make leaf and tnode more uniform
This change makes some fundamental changes to the way leaves and tnodes are constructed. The big differences are: 1. Leaves now populate pos and bits indicating their full key size. 2. Trie nodes now mask out their lower bits to be consistent with the leaf 3. Both structures have been reordered so that rt_trie_node now consisists of a much larger region including the pos, bits, and rcu portions of the tnode structure. On 32b systems this will result in the leaf being 4B larger as the pos and bits values were added to a hole created by the key as it was only 4B in length. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c192
1 files changed, 82 insertions, 110 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d3dbb4821ae8..2b7220728b24 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,24 +87,38 @@
87 87
88typedef unsigned int t_key; 88typedef unsigned int t_key;
89 89
90#define T_TNODE 0 90#define IS_TNODE(n) ((n)->bits)
91#define T_LEAF 1 91#define IS_LEAF(n) (!(n)->bits)
92#define NODE_TYPE_MASK 0x1UL
93#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
94 92
95#define IS_TNODE(n) (!(n->parent & T_LEAF)) 93struct tnode {
96#define IS_LEAF(n) (n->parent & T_LEAF) 94 t_key key;
95 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
96 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
97 struct tnode __rcu *parent;
98 union {
99 struct rcu_head rcu;
100 struct tnode *tnode_free;
101 };
102 unsigned int full_children; /* KEYLENGTH bits needed */
103 unsigned int empty_children; /* KEYLENGTH bits needed */
104 struct rt_trie_node __rcu *child[0];
105};
97 106
98struct rt_trie_node { 107struct rt_trie_node {
99 unsigned long parent;
100 t_key key; 108 t_key key;
109 unsigned char bits;
110 unsigned char pos;
111 struct tnode __rcu *parent;
112 struct rcu_head rcu;
101}; 113};
102 114
103struct leaf { 115struct leaf {
104 unsigned long parent;
105 t_key key; 116 t_key key;
106 struct hlist_head list; 117 unsigned char bits;
118 unsigned char pos;
119 struct tnode __rcu *parent;
107 struct rcu_head rcu; 120 struct rcu_head rcu;
121 struct hlist_head list;
108}; 122};
109 123
110struct leaf_info { 124struct leaf_info {
@@ -115,20 +129,6 @@ struct leaf_info {
115 struct rcu_head rcu; 129 struct rcu_head rcu;
116}; 130};
117 131
118struct tnode {
119 unsigned long parent;
120 t_key key;
121 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
122 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
123 unsigned int full_children; /* KEYLENGTH bits needed */
124 unsigned int empty_children; /* KEYLENGTH bits needed */
125 union {
126 struct rcu_head rcu;
127 struct tnode *tnode_free;
128 };
129 struct rt_trie_node __rcu *child[0];
130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS 132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats { 133struct trie_use_stats {
134 unsigned int gets; 134 unsigned int gets;
@@ -176,38 +176,27 @@ static const int sync_pages = 128;
176static struct kmem_cache *fn_alias_kmem __read_mostly; 176static struct kmem_cache *fn_alias_kmem __read_mostly;
177static struct kmem_cache *trie_leaf_kmem __read_mostly; 177static struct kmem_cache *trie_leaf_kmem __read_mostly;
178 178
179/* 179/* caller must hold RTNL */
180 * caller must hold RTNL 180#define node_parent(n) rtnl_dereference((n)->parent)
181 */
182static inline struct tnode *node_parent(const struct rt_trie_node *node)
183{
184 unsigned long parent;
185 181
186 parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); 182/* caller must hold RCU read lock or RTNL */
183#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
187 184
188 return (struct tnode *)(parent & ~NODE_TYPE_MASK); 185/* wrapper for rcu_assign_pointer */
189} 186static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
190
191/*
192 * caller must hold RCU read lock or RTNL
193 */
194static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
195{ 187{
196 unsigned long parent; 188 if (node)
197 189 rcu_assign_pointer(node->parent, ptr);
198 parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
199 lockdep_rtnl_is_held());
200
201 return (struct tnode *)(parent & ~NODE_TYPE_MASK);
202} 190}
203 191
204/* Same as rcu_assign_pointer 192#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
205 * but that macro() assumes that value is a pointer. 193
194/* This provides us with the number of children in this node, in the case of a
195 * leaf this will return 0 meaning none of the children are accessible.
206 */ 196 */
207static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) 197static inline int tnode_child_length(const struct tnode *tn)
208{ 198{
209 smp_wmb(); 199 return (1ul << tn->bits) & ~(1ul);
210 node->parent = (unsigned long)ptr | NODE_TYPE(node);
211} 200}
212 201
213/* 202/*
@@ -215,7 +204,7 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
215 */ 204 */
216static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) 205static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
217{ 206{
218 BUG_ON(i >= 1U << tn->bits); 207 BUG_ON(i >= tnode_child_length(tn));
219 208
220 return rtnl_dereference(tn->child[i]); 209 return rtnl_dereference(tn->child[i]);
221} 210}
@@ -225,16 +214,11 @@ static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsig
225 */ 214 */
226static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) 215static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
227{ 216{
228 BUG_ON(i >= 1U << tn->bits); 217 BUG_ON(i >= tnode_child_length(tn));
229 218
230 return rcu_dereference_rtnl(tn->child[i]); 219 return rcu_dereference_rtnl(tn->child[i]);
231} 220}
232 221
233static inline int tnode_child_length(const struct tnode *tn)
234{
235 return 1 << tn->bits;
236}
237
238static inline t_key mask_pfx(t_key k, unsigned int l) 222static inline t_key mask_pfx(t_key k, unsigned int l)
239{ 223{
240 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 224 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
@@ -336,11 +320,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
336 320
337*/ 321*/
338 322
339static inline void check_tnode(const struct tnode *tn)
340{
341 WARN_ON(tn && tn->pos+tn->bits > 32);
342}
343
344static const int halve_threshold = 25; 323static const int halve_threshold = 25;
345static const int inflate_threshold = 50; 324static const int inflate_threshold = 50;
346static const int halve_threshold_root = 15; 325static const int halve_threshold_root = 15;
@@ -426,11 +405,20 @@ static void tnode_free_flush(void)
426 } 405 }
427} 406}
428 407
429static struct leaf *leaf_new(void) 408static struct leaf *leaf_new(t_key key)
430{ 409{
431 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); 410 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
432 if (l) { 411 if (l) {
433 l->parent = T_LEAF; 412 l->parent = NULL;
413 /* set key and pos to reflect full key value
414 * any trailing zeros in the key should be ignored
415 * as the nodes are searched
416 */
417 l->key = key;
418 l->pos = KEYLENGTH;
419 /* set bits to 0 indicating we are not a tnode */
420 l->bits = 0;
421
434 INIT_HLIST_HEAD(&l->list); 422 INIT_HLIST_HEAD(&l->list);
435 } 423 }
436 return l; 424 return l;
@@ -451,12 +439,16 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
451{ 439{
452 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); 440 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
453 struct tnode *tn = tnode_alloc(sz); 441 struct tnode *tn = tnode_alloc(sz);
442 unsigned int shift = pos + bits;
443
444 /* verify bits and pos their msb bits clear and values are valid */
445 BUG_ON(!bits || (shift > KEYLENGTH));
454 446
455 if (tn) { 447 if (tn) {
456 tn->parent = T_TNODE; 448 tn->parent = NULL;
457 tn->pos = pos; 449 tn->pos = pos;
458 tn->bits = bits; 450 tn->bits = bits;
459 tn->key = key; 451 tn->key = mask_pfx(key, pos);
460 tn->full_children = 0; 452 tn->full_children = 0;
461 tn->empty_children = 1<<bits; 453 tn->empty_children = 1<<bits;
462 } 454 }
@@ -473,10 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
473 465
474static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) 466static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
475{ 467{
476 if (n == NULL || IS_LEAF(n)) 468 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
477 return 0;
478
479 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
480} 469}
481 470
482static inline void put_child(struct tnode *tn, int i, 471static inline void put_child(struct tnode *tn, int i,
@@ -514,8 +503,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
514 else if (!wasfull && isfull) 503 else if (!wasfull && isfull)
515 tn->full_children++; 504 tn->full_children++;
516 505
517 if (n) 506 node_set_parent(n, tn);
518 node_set_parent(n, tn);
519 507
520 rcu_assign_pointer(tn->child[i], n); 508 rcu_assign_pointer(tn->child[i], n);
521} 509}
@@ -523,7 +511,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *
523#define MAX_WORK 10 511#define MAX_WORK 10
524static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) 512static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
525{ 513{
526 int i; 514 struct rt_trie_node *n = NULL;
527 struct tnode *old_tn; 515 struct tnode *old_tn;
528 int inflate_threshold_use; 516 int inflate_threshold_use;
529 int halve_threshold_use; 517 int halve_threshold_use;
@@ -536,12 +524,11 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
536 tn, inflate_threshold, halve_threshold); 524 tn, inflate_threshold, halve_threshold);
537 525
538 /* No children */ 526 /* No children */
539 if (tn->empty_children == tnode_child_length(tn)) { 527 if (tn->empty_children > (tnode_child_length(tn) - 1))
540 tnode_free_safe(tn); 528 goto no_children;
541 return NULL; 529
542 }
543 /* One child */ 530 /* One child */
544 if (tn->empty_children == tnode_child_length(tn) - 1) 531 if (tn->empty_children == (tnode_child_length(tn) - 1))
545 goto one_child; 532 goto one_child;
546 /* 533 /*
547 * Double as long as the resulting node has a number of 534 * Double as long as the resulting node has a number of
@@ -607,11 +594,9 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
607 * 594 *
608 */ 595 */
609 596
610 check_tnode(tn);
611
612 /* Keep root node larger */ 597 /* Keep root node larger */
613 598
614 if (!node_parent((struct rt_trie_node *)tn)) { 599 if (!node_parent(tn)) {
615 inflate_threshold_use = inflate_threshold_root; 600 inflate_threshold_use = inflate_threshold_root;
616 halve_threshold_use = halve_threshold_root; 601 halve_threshold_use = halve_threshold_root;
617 } else { 602 } else {
@@ -637,8 +622,6 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
637 } 622 }
638 } 623 }
639 624
640 check_tnode(tn);
641
642 /* Return if at least one inflate is run */ 625 /* Return if at least one inflate is run */
643 if (max_work != MAX_WORK) 626 if (max_work != MAX_WORK)
644 return (struct rt_trie_node *) tn; 627 return (struct rt_trie_node *) tn;
@@ -666,21 +649,16 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
666 649
667 650
668 /* Only one child remains */ 651 /* Only one child remains */
669 if (tn->empty_children == tnode_child_length(tn) - 1) { 652 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
653 unsigned long i;
670one_child: 654one_child:
671 for (i = 0; i < tnode_child_length(tn); i++) { 655 for (i = tnode_child_length(tn); !n && i;)
672 struct rt_trie_node *n; 656 n = tnode_get_child(tn, --i);
673 657no_children:
674 n = rtnl_dereference(tn->child[i]); 658 /* compress one level */
675 if (!n) 659 node_set_parent(n, NULL);
676 continue; 660 tnode_free_safe(tn);
677 661 return n;
678 /* compress one level */
679
680 node_set_parent(n, NULL);
681 tnode_free_safe(tn);
682 return n;
683 }
684 } 662 }
685 return (struct rt_trie_node *) tn; 663 return (struct rt_trie_node *) tn;
686} 664}
@@ -760,8 +738,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
760 738
761 /* A leaf or an internal node with skipped bits */ 739 /* A leaf or an internal node with skipped bits */
762 740
763 if (IS_LEAF(node) || ((struct tnode *) node)->pos > 741 if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
764 tn->pos + tn->bits - 1) {
765 put_child(tn, 742 put_child(tn,
766 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), 743 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
767 node); 744 node);
@@ -958,11 +935,9 @@ fib_find_node(struct trie *t, u32 key)
958 pos = 0; 935 pos = 0;
959 n = rcu_dereference_rtnl(t->trie); 936 n = rcu_dereference_rtnl(t->trie);
960 937
961 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 938 while (n && IS_TNODE(n)) {
962 tn = (struct tnode *) n; 939 tn = (struct tnode *) n;
963 940
964 check_tnode(tn);
965
966 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 941 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
967 pos = tn->pos + tn->bits; 942 pos = tn->pos + tn->bits;
968 n = tnode_get_child_rcu(tn, 943 n = tnode_get_child_rcu(tn,
@@ -988,7 +963,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
988 963
989 key = tn->key; 964 key = tn->key;
990 965
991 while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { 966 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
992 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 967 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
993 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 968 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
994 tn = (struct tnode *)resize(t, tn); 969 tn = (struct tnode *)resize(t, tn);
@@ -996,7 +971,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
996 tnode_put_child_reorg(tp, cindex, 971 tnode_put_child_reorg(tp, cindex,
997 (struct rt_trie_node *)tn, wasfull); 972 (struct rt_trie_node *)tn, wasfull);
998 973
999 tp = node_parent((struct rt_trie_node *) tn); 974 tp = node_parent(tn);
1000 if (!tp) 975 if (!tp)
1001 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); 976 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
1002 977
@@ -1048,11 +1023,9 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1048 * If it doesn't, we need to replace it with a T_TNODE. 1023 * If it doesn't, we need to replace it with a T_TNODE.
1049 */ 1024 */
1050 1025
1051 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 1026 while (n && IS_TNODE(n)) {
1052 tn = (struct tnode *) n; 1027 tn = (struct tnode *) n;
1053 1028
1054 check_tnode(tn);
1055
1056 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { 1029 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1057 tp = tn; 1030 tp = tn;
1058 pos = tn->pos + tn->bits; 1031 pos = tn->pos + tn->bits;
@@ -1087,12 +1060,11 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1087 insert_leaf_info(&l->list, li); 1060 insert_leaf_info(&l->list, li);
1088 goto done; 1061 goto done;
1089 } 1062 }
1090 l = leaf_new(); 1063 l = leaf_new(key);
1091 1064
1092 if (!l) 1065 if (!l)
1093 return NULL; 1066 return NULL;
1094 1067
1095 l->key = key;
1096 li = leaf_info_new(plen); 1068 li = leaf_info_new(plen);
1097 1069
1098 if (!li) { 1070 if (!li) {
@@ -1569,7 +1541,7 @@ backtrace:
1569 if (chopped_off <= pn->bits) { 1541 if (chopped_off <= pn->bits) {
1570 cindex &= ~(1 << (chopped_off-1)); 1542 cindex &= ~(1 << (chopped_off-1));
1571 } else { 1543 } else {
1572 struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); 1544 struct tnode *parent = node_parent_rcu(pn);
1573 if (!parent) 1545 if (!parent)
1574 goto failed; 1546 goto failed;
1575 1547
@@ -1597,7 +1569,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup);
1597 */ 1569 */
1598static void trie_leaf_remove(struct trie *t, struct leaf *l) 1570static void trie_leaf_remove(struct trie *t, struct leaf *l)
1599{ 1571{
1600 struct tnode *tp = node_parent((struct rt_trie_node *) l); 1572 struct tnode *tp = node_parent(l);
1601 1573
1602 pr_debug("entering trie_leaf_remove(%p)\n", l); 1574 pr_debug("entering trie_leaf_remove(%p)\n", l);
1603 1575
@@ -2374,7 +2346,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2374 2346
2375 if (IS_TNODE(n)) { 2347 if (IS_TNODE(n)) {
2376 struct tnode *tn = (struct tnode *) n; 2348 struct tnode *tn = (struct tnode *) n;
2377 __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); 2349 __be32 prf = htonl(tn->key);
2378 2350
2379 seq_indent(seq, iter->depth-1); 2351 seq_indent(seq, iter->depth-1);
2380 seq_printf(seq, " +-- %pI4/%d %d %d %d\n", 2352 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",