summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2014-12-31 13:56:12 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-31 18:25:54 -0500
commite9b44019d41a5cf41de43e895a5ed38b19a59b5b (patch)
tree7da1e2f5c10eca7ce43ed6a2acaa1e0b26047851 /net
parent836a0123c98c91df71678ce63c0d42770996582a (diff)
fib_trie: Update meaning of pos to represent unchecked bits
This change moves the pos value to the other side of the "bits" field. By doing this it actually simplifies a significant amount of code in the trie. For example when halving a tree we know that the bit lost exists at oldnode->pos, and if we inflate the tree the new bit being add is at tn->pos. Previously to find those bits you would have to subtract pos and bits from the keylength or start with a value of (1 << 31) and then shift that. There are a number of spots throughout the code that benefit from this. In the case of the hot-path searches the main advantage is that we can drop 2 or more operations from the search path as we no longer need to compute the value for the index to be shifted by and can instead just use the raw pos value. In addition the tkey_extract_bits is now defunct and can be replaced by get_index since the two operations were doing the same thing, but now get_index does it much more quickly as it is only an xor and shift versus a pair of shifts and a subtraction. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/fib_trie.c194
1 files changed, 81 insertions, 113 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8a147c3baaba..9d675486b38c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -90,8 +90,7 @@ typedef unsigned int t_key;
90#define IS_TNODE(n) ((n)->bits) 90#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits) 91#define IS_LEAF(n) (!(n)->bits)
92 92
93#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits) 93#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
94#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
95 94
96struct tnode { 95struct tnode {
97 t_key key; 96 t_key key;
@@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned
209 return rcu_dereference_rtnl(tn->child[i]); 208 return rcu_dereference_rtnl(tn->child[i]);
210} 209}
211 210
212static inline t_key mask_pfx(t_key k, unsigned int l) 211/* To understand this stuff, an understanding of keys and all their bits is
213{ 212 * necessary. Every node in the trie has a key associated with it, but not
214 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); 213 * all of the bits in that key are significant.
215} 214 *
216 215 * Consider a node 'n' and its parent 'tp'.
217static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) 216 *
218{ 217 * If n is a leaf, every bit in its key is significant. Its presence is
219 if (offset < KEYLENGTH) 218 * necessitated by path compression, since during a tree traversal (when
220 return ((t_key)(a << offset)) >> (KEYLENGTH - bits); 219 * searching for a leaf - unless we are doing an insertion) we will completely
221 else 220 * ignore all skipped bits we encounter. Thus we need to verify, at the end of
222 return 0; 221 * a potentially successful search, that we have indeed been walking the
223} 222 * correct key path.
224 223 *
225/* 224 * Note that we can never "miss" the correct key in the tree if present by
226 To understand this stuff, an understanding of keys and all their bits is 225 * following the wrong path. Path compression ensures that segments of the key
227 necessary. Every node in the trie has a key associated with it, but not 226 * that are the same for all keys with a given prefix are skipped, but the
228 all of the bits in that key are significant. 227 * skipped part *is* identical for each node in the subtrie below the skipped
229 228 * bit! trie_insert() in this implementation takes care of that.
230 Consider a node 'n' and its parent 'tp'. 229 *
231 230 * if n is an internal node - a 'tnode' here, the various parts of its key
232 If n is a leaf, every bit in its key is significant. Its presence is 231 * have many different meanings.
233 necessitated by path compression, since during a tree traversal (when 232 *
234 searching for a leaf - unless we are doing an insertion) we will completely 233 * Example:
235 ignore all skipped bits we encounter. Thus we need to verify, at the end of 234 * _________________________________________________________________
236 a potentially successful search, that we have indeed been walking the 235 * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
237 correct key path. 236 * -----------------------------------------------------------------
238 237 * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
239 Note that we can never "miss" the correct key in the tree if present by 238 *
240 following the wrong path. Path compression ensures that segments of the key 239 * _________________________________________________________________
241 that are the same for all keys with a given prefix are skipped, but the 240 * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
242 skipped part *is* identical for each node in the subtrie below the skipped 241 * -----------------------------------------------------------------
243 bit! trie_insert() in this implementation takes care of that - note the 242 * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
244 call to tkey_sub_equals() in trie_insert(). 243 *
245 244 * tp->pos = 22
246 if n is an internal node - a 'tnode' here, the various parts of its key 245 * tp->bits = 3
247 have many different meanings. 246 * n->pos = 13
248 247 * n->bits = 4
249 Example: 248 *
250 _________________________________________________________________ 249 * First, let's just ignore the bits that come before the parent tp, that is
251 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C | 250 * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
252 ----------------------------------------------------------------- 251 * point we do not use them for anything.
253 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 252 *
254 253 * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
255 _________________________________________________________________ 254 * index into the parent's child array. That is, they will be used to find
256 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u | 255 * 'n' among tp's children.
257 ----------------------------------------------------------------- 256 *
258 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 257 * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
259 258 * for the node n.
260 tp->pos = 7 259 *
261 tp->bits = 3 260 * All the bits we have seen so far are significant to the node n. The rest
262 n->pos = 15 261 * of the bits are really not needed or indeed known in n->key.
263 n->bits = 4 262 *
264 263 * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
265 First, let's just ignore the bits that come before the parent tp, that is 264 * n's child array, and will of course be different for each child.
266 the bits from 0 to (tp->pos-1). They are *known* but at this point we do 265 *
267 not use them for anything. 266 * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
268 267 * at this point.
269 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the 268 */
270 index into the parent's child array. That is, they will be used to find
271 'n' among tp's children.
272
273 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
274 for the node n.
275
276 All the bits we have seen so far are significant to the node n. The rest
277 of the bits are really not needed or indeed known in n->key.
278
279 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
280 n's child array, and will of course be different for each child.
281
282
283 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
284 at this point.
285
286*/
287 269
288static const int halve_threshold = 25; 270static const int halve_threshold = 25;
289static const int inflate_threshold = 50; 271static const int inflate_threshold = 50;
@@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
367 * as the nodes are searched 349 * as the nodes are searched
368 */ 350 */
369 l->key = key; 351 l->key = key;
370 l->pos = KEYLENGTH; 352 l->pos = 0;
371 /* set bits to 0 indicating we are not a tnode */ 353 /* set bits to 0 indicating we are not a tnode */
372 l->bits = 0; 354 l->bits = 0;
373 355
@@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
400 tn->parent = NULL; 382 tn->parent = NULL;
401 tn->pos = pos; 383 tn->pos = pos;
402 tn->bits = bits; 384 tn->bits = bits;
403 tn->key = mask_pfx(key, pos); 385 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
404 tn->full_children = 0; 386 tn->full_children = 0;
405 tn->empty_children = 1<<bits; 387 tn->empty_children = 1<<bits;
406 } 388 }
@@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
410 return tn; 392 return tn;
411} 393}
412 394
413/* 395/* Check whether a tnode 'n' is "full", i.e. it is an internal node
414 * Check whether a tnode 'n' is "full", i.e. it is an internal node
415 * and no bits are skipped. See discussion in dyntree paper p. 6 396 * and no bits are skipped. See discussion in dyntree paper p. 6
416 */ 397 */
417
418static inline int tnode_full(const struct tnode *tn, const struct tnode *n) 398static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
419{ 399{
420 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits)); 400 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
421} 401}
422 402
423static inline void put_child(struct tnode *tn, int i, 403static inline void put_child(struct tnode *tn, int i,
@@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
641{ 621{
642 int olen = tnode_child_length(oldtnode); 622 int olen = tnode_child_length(oldtnode);
643 struct tnode *tn; 623 struct tnode *tn;
624 t_key m;
644 int i; 625 int i;
645 626
646 pr_debug("In inflate\n"); 627 pr_debug("In inflate\n");
647 628
648 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 629 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
649 630
650 if (!tn) 631 if (!tn)
651 return ERR_PTR(-ENOMEM); 632 return ERR_PTR(-ENOMEM);
@@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
656 * fails. In case of failure we return the oldnode and inflate 637 * fails. In case of failure we return the oldnode and inflate
657 * of tnode is ignored. 638 * of tnode is ignored.
658 */ 639 */
640 for (i = 0, m = 1u << tn->pos; i < olen; i++) {
641 struct tnode *inode = tnode_get_child(oldtnode, i);
659 642
660 for (i = 0; i < olen; i++) { 643 if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
661 struct tnode *inode;
662
663 inode = tnode_get_child(oldtnode, i);
664 if (tnode_full(oldtnode, inode) && inode->bits > 1) {
665 struct tnode *left, *right; 644 struct tnode *left, *right;
666 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
667 645
668 left = tnode_new(inode->key&(~m), inode->pos + 1, 646 left = tnode_new(inode->key & ~m, inode->pos,
669 inode->bits - 1); 647 inode->bits - 1);
670 if (!left) 648 if (!left)
671 goto nomem; 649 goto nomem;
672 650
673 right = tnode_new(inode->key|m, inode->pos + 1, 651 right = tnode_new(inode->key | m, inode->pos,
674 inode->bits - 1); 652 inode->bits - 1);
675 653
676 if (!right) { 654 if (!right) {
@@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
694 672
695 /* A leaf or an internal node with skipped bits */ 673 /* A leaf or an internal node with skipped bits */
696 if (!tnode_full(oldtnode, inode)) { 674 if (!tnode_full(oldtnode, inode)) {
697 put_child(tn, 675 put_child(tn, get_index(inode->key, tn), inode);
698 tkey_extract_bits(inode->key, tn->pos, tn->bits),
699 inode);
700 continue; 676 continue;
701 } 677 }
702 678
@@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
767 743
768 pr_debug("In halve\n"); 744 pr_debug("In halve\n");
769 745
770 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 746 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
771 747
772 if (!tn) 748 if (!tn)
773 return ERR_PTR(-ENOMEM); 749 return ERR_PTR(-ENOMEM);
@@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
787 if (left && right) { 763 if (left && right) {
788 struct tnode *newn; 764 struct tnode *newn;
789 765
790 newn = tnode_new(left->key, tn->pos + tn->bits, 1); 766 newn = tnode_new(left->key, oldtnode->pos, 1);
791 767
792 if (!newn) 768 if (!newn)
793 goto nomem; 769 goto nomem;
@@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
915 key = tn->key; 891 key = tn->key;
916 892
917 while (tn != NULL && (tp = node_parent(tn)) != NULL) { 893 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
918 cindex = tkey_extract_bits(key, tp->pos, tp->bits); 894 cindex = get_index(key, tp);
919 wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); 895 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
920 tn = resize(t, tn); 896 tn = resize(t, tn);
921 897
@@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
1005 */ 981 */
1006 if (n) { 982 if (n) {
1007 struct tnode *tn; 983 struct tnode *tn;
1008 int newpos;
1009
1010 newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
1011 984
1012 tn = tnode_new(key, newpos, 1); 985 tn = tnode_new(key, __fls(key ^ n->key), 1);
1013 if (!tn) { 986 if (!tn) {
1014 free_leaf_info(li); 987 free_leaf_info(li);
1015 node_free(l); 988 node_free(l);
@@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode *l)
1559static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c) 1532static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
1560{ 1533{
1561 do { 1534 do {
1562 t_key idx; 1535 t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
1563
1564 if (c)
1565 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
1566 else
1567 idx = 0;
1568 1536
1569 while (idx < 1u << p->bits) { 1537 while (idx < 1u << p->bits) {
1570 c = tnode_get_child_rcu(p, idx++); 1538 c = tnode_get_child_rcu(p, idx++);
@@ -1851,7 +1819,7 @@ rescan:
1851 /* Current node exhausted, pop back up */ 1819 /* Current node exhausted, pop back up */
1852 p = node_parent_rcu(tn); 1820 p = node_parent_rcu(tn);
1853 if (p) { 1821 if (p) {
1854 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; 1822 cindex = get_index(tn->key, p) + 1;
1855 tn = p; 1823 tn = p;
1856 --iter->depth; 1824 --iter->depth;
1857 goto rescan; 1825 goto rescan;
@@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
2186 if (IS_TNODE(n)) { 2154 if (IS_TNODE(n)) {
2187 __be32 prf = htonl(n->key); 2155 __be32 prf = htonl(n->key);
2188 2156
2189 seq_indent(seq, iter->depth - 1); 2157 seq_indent(seq, iter->depth-1);
2190 seq_printf(seq, " +-- %pI4/%d %d %d %d\n", 2158 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2191 &prf, n->pos, n->bits, n->full_children, 2159 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2192 n->empty_children); 2160 n->full_children, n->empty_children);
2193 } else { 2161 } else {
2194 struct leaf_info *li; 2162 struct leaf_info *li;
2195 __be32 val = htonl(n->key); 2163 __be32 val = htonl(n->key);