diff options
-rw-r--r-- | net/ipv4/fib_trie.c | 192 |
1 files changed, 82 insertions, 110 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d3dbb4821ae8..2b7220728b24 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -87,24 +87,38 @@ | |||
87 | 87 | ||
88 | typedef unsigned int t_key; | 88 | typedef unsigned int t_key; |
89 | 89 | ||
90 | #define T_TNODE 0 | 90 | #define IS_TNODE(n) ((n)->bits) |
91 | #define T_LEAF 1 | 91 | #define IS_LEAF(n) (!(n)->bits) |
92 | #define NODE_TYPE_MASK 0x1UL | ||
93 | #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) | ||
94 | 92 | ||
95 | #define IS_TNODE(n) (!(n->parent & T_LEAF)) | 93 | struct tnode { |
96 | #define IS_LEAF(n) (n->parent & T_LEAF) | 94 | t_key key; |
95 | unsigned char bits; /* 2log(KEYLENGTH) bits needed */ | ||
96 | unsigned char pos; /* 2log(KEYLENGTH) bits needed */ | ||
97 | struct tnode __rcu *parent; | ||
98 | union { | ||
99 | struct rcu_head rcu; | ||
100 | struct tnode *tnode_free; | ||
101 | }; | ||
102 | unsigned int full_children; /* KEYLENGTH bits needed */ | ||
103 | unsigned int empty_children; /* KEYLENGTH bits needed */ | ||
104 | struct rt_trie_node __rcu *child[0]; | ||
105 | }; | ||
97 | 106 | ||
98 | struct rt_trie_node { | 107 | struct rt_trie_node { |
99 | unsigned long parent; | ||
100 | t_key key; | 108 | t_key key; |
109 | unsigned char bits; | ||
110 | unsigned char pos; | ||
111 | struct tnode __rcu *parent; | ||
112 | struct rcu_head rcu; | ||
101 | }; | 113 | }; |
102 | 114 | ||
103 | struct leaf { | 115 | struct leaf { |
104 | unsigned long parent; | ||
105 | t_key key; | 116 | t_key key; |
106 | struct hlist_head list; | 117 | unsigned char bits; |
118 | unsigned char pos; | ||
119 | struct tnode __rcu *parent; | ||
107 | struct rcu_head rcu; | 120 | struct rcu_head rcu; |
121 | struct hlist_head list; | ||
108 | }; | 122 | }; |
109 | 123 | ||
110 | struct leaf_info { | 124 | struct leaf_info { |
@@ -115,20 +129,6 @@ struct leaf_info { | |||
115 | struct rcu_head rcu; | 129 | struct rcu_head rcu; |
116 | }; | 130 | }; |
117 | 131 | ||
118 | struct tnode { | ||
119 | unsigned long parent; | ||
120 | t_key key; | ||
121 | unsigned char pos; /* 2log(KEYLENGTH) bits needed */ | ||
122 | unsigned char bits; /* 2log(KEYLENGTH) bits needed */ | ||
123 | unsigned int full_children; /* KEYLENGTH bits needed */ | ||
124 | unsigned int empty_children; /* KEYLENGTH bits needed */ | ||
125 | union { | ||
126 | struct rcu_head rcu; | ||
127 | struct tnode *tnode_free; | ||
128 | }; | ||
129 | struct rt_trie_node __rcu *child[0]; | ||
130 | }; | ||
131 | |||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
133 | struct trie_use_stats { | 133 | struct trie_use_stats { |
134 | unsigned int gets; | 134 | unsigned int gets; |
@@ -176,38 +176,27 @@ static const int sync_pages = 128; | |||
176 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 176 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
177 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 177 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
178 | 178 | ||
179 | /* | 179 | /* caller must hold RTNL */ |
180 | * caller must hold RTNL | 180 | #define node_parent(n) rtnl_dereference((n)->parent) |
181 | */ | ||
182 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
183 | { | ||
184 | unsigned long parent; | ||
185 | 181 | ||
186 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | 182 | /* caller must hold RCU read lock or RTNL */ |
183 | #define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent) | ||
187 | 184 | ||
188 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | 185 | /* wrapper for rcu_assign_pointer */ |
189 | } | 186 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) |
190 | |||
191 | /* | ||
192 | * caller must hold RCU read lock or RTNL | ||
193 | */ | ||
194 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
195 | { | 187 | { |
196 | unsigned long parent; | 188 | if (node) |
197 | 189 | rcu_assign_pointer(node->parent, ptr); | |
198 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
199 | lockdep_rtnl_is_held()); | ||
200 | |||
201 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
202 | } | 190 | } |
203 | 191 | ||
204 | /* Same as rcu_assign_pointer | 192 | #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p) |
205 | * but that macro() assumes that value is a pointer. | 193 | |
194 | /* This provides us with the number of children in this node, in the case of a | ||
195 | * leaf this will return 0 meaning none of the children are accessible. | ||
206 | */ | 196 | */ |
207 | static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | 197 | static inline int tnode_child_length(const struct tnode *tn) |
208 | { | 198 | { |
209 | smp_wmb(); | 199 | return (1ul << tn->bits) & ~(1ul); |
210 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | ||
211 | } | 200 | } |
212 | 201 | ||
213 | /* | 202 | /* |
@@ -215,7 +204,7 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | |||
215 | */ | 204 | */ |
216 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | 205 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) |
217 | { | 206 | { |
218 | BUG_ON(i >= 1U << tn->bits); | 207 | BUG_ON(i >= tnode_child_length(tn)); |
219 | 208 | ||
220 | return rtnl_dereference(tn->child[i]); | 209 | return rtnl_dereference(tn->child[i]); |
221 | } | 210 | } |
@@ -225,16 +214,11 @@ static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsig | |||
225 | */ | 214 | */ |
226 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | 215 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) |
227 | { | 216 | { |
228 | BUG_ON(i >= 1U << tn->bits); | 217 | BUG_ON(i >= tnode_child_length(tn)); |
229 | 218 | ||
230 | return rcu_dereference_rtnl(tn->child[i]); | 219 | return rcu_dereference_rtnl(tn->child[i]); |
231 | } | 220 | } |
232 | 221 | ||
233 | static inline int tnode_child_length(const struct tnode *tn) | ||
234 | { | ||
235 | return 1 << tn->bits; | ||
236 | } | ||
237 | |||
238 | static inline t_key mask_pfx(t_key k, unsigned int l) | 222 | static inline t_key mask_pfx(t_key k, unsigned int l) |
239 | { | 223 | { |
240 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); | 224 | return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); |
@@ -336,11 +320,6 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) | |||
336 | 320 | ||
337 | */ | 321 | */ |
338 | 322 | ||
339 | static inline void check_tnode(const struct tnode *tn) | ||
340 | { | ||
341 | WARN_ON(tn && tn->pos+tn->bits > 32); | ||
342 | } | ||
343 | |||
344 | static const int halve_threshold = 25; | 323 | static const int halve_threshold = 25; |
345 | static const int inflate_threshold = 50; | 324 | static const int inflate_threshold = 50; |
346 | static const int halve_threshold_root = 15; | 325 | static const int halve_threshold_root = 15; |
@@ -426,11 +405,20 @@ static void tnode_free_flush(void) | |||
426 | } | 405 | } |
427 | } | 406 | } |
428 | 407 | ||
429 | static struct leaf *leaf_new(void) | 408 | static struct leaf *leaf_new(t_key key) |
430 | { | 409 | { |
431 | struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); | 410 | struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL); |
432 | if (l) { | 411 | if (l) { |
433 | l->parent = T_LEAF; | 412 | l->parent = NULL; |
413 | /* set key and pos to reflect full key value | ||
414 | * any trailing zeros in the key should be ignored | ||
415 | * as the nodes are searched | ||
416 | */ | ||
417 | l->key = key; | ||
418 | l->pos = KEYLENGTH; | ||
419 | /* set bits to 0 indicating we are not a tnode */ | ||
420 | l->bits = 0; | ||
421 | |||
434 | INIT_HLIST_HEAD(&l->list); | 422 | INIT_HLIST_HEAD(&l->list); |
435 | } | 423 | } |
436 | return l; | 424 | return l; |
@@ -451,12 +439,16 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
451 | { | 439 | { |
452 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); | 440 | size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); |
453 | struct tnode *tn = tnode_alloc(sz); | 441 | struct tnode *tn = tnode_alloc(sz); |
442 | unsigned int shift = pos + bits; | ||
443 | |||
444 | /* verify bits and pos their msb bits clear and values are valid */ | ||
445 | BUG_ON(!bits || (shift > KEYLENGTH)); | ||
454 | 446 | ||
455 | if (tn) { | 447 | if (tn) { |
456 | tn->parent = T_TNODE; | 448 | tn->parent = NULL; |
457 | tn->pos = pos; | 449 | tn->pos = pos; |
458 | tn->bits = bits; | 450 | tn->bits = bits; |
459 | tn->key = key; | 451 | tn->key = mask_pfx(key, pos); |
460 | tn->full_children = 0; | 452 | tn->full_children = 0; |
461 | tn->empty_children = 1<<bits; | 453 | tn->empty_children = 1<<bits; |
462 | } | 454 | } |
@@ -473,10 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
473 | 465 | ||
474 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) | 466 | static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) |
475 | { | 467 | { |
476 | if (n == NULL || IS_LEAF(n)) | 468 | return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits)); |
477 | return 0; | ||
478 | |||
479 | return ((struct tnode *) n)->pos == tn->pos + tn->bits; | ||
480 | } | 469 | } |
481 | 470 | ||
482 | static inline void put_child(struct tnode *tn, int i, | 471 | static inline void put_child(struct tnode *tn, int i, |
@@ -514,8 +503,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * | |||
514 | else if (!wasfull && isfull) | 503 | else if (!wasfull && isfull) |
515 | tn->full_children++; | 504 | tn->full_children++; |
516 | 505 | ||
517 | if (n) | 506 | node_set_parent(n, tn); |
518 | node_set_parent(n, tn); | ||
519 | 507 | ||
520 | rcu_assign_pointer(tn->child[i], n); | 508 | rcu_assign_pointer(tn->child[i], n); |
521 | } | 509 | } |
@@ -523,7 +511,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node * | |||
523 | #define MAX_WORK 10 | 511 | #define MAX_WORK 10 |
524 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) | 512 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) |
525 | { | 513 | { |
526 | int i; | 514 | struct rt_trie_node *n = NULL; |
527 | struct tnode *old_tn; | 515 | struct tnode *old_tn; |
528 | int inflate_threshold_use; | 516 | int inflate_threshold_use; |
529 | int halve_threshold_use; | 517 | int halve_threshold_use; |
@@ -536,12 +524,11 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) | |||
536 | tn, inflate_threshold, halve_threshold); | 524 | tn, inflate_threshold, halve_threshold); |
537 | 525 | ||
538 | /* No children */ | 526 | /* No children */ |
539 | if (tn->empty_children == tnode_child_length(tn)) { | 527 | if (tn->empty_children > (tnode_child_length(tn) - 1)) |
540 | tnode_free_safe(tn); | 528 | goto no_children; |
541 | return NULL; | 529 | |
542 | } | ||
543 | /* One child */ | 530 | /* One child */ |
544 | if (tn->empty_children == tnode_child_length(tn) - 1) | 531 | if (tn->empty_children == (tnode_child_length(tn) - 1)) |
545 | goto one_child; | 532 | goto one_child; |
546 | /* | 533 | /* |
547 | * Double as long as the resulting node has a number of | 534 | * Double as long as the resulting node has a number of |
@@ -607,11 +594,9 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) | |||
607 | * | 594 | * |
608 | */ | 595 | */ |
609 | 596 | ||
610 | check_tnode(tn); | ||
611 | |||
612 | /* Keep root node larger */ | 597 | /* Keep root node larger */ |
613 | 598 | ||
614 | if (!node_parent((struct rt_trie_node *)tn)) { | 599 | if (!node_parent(tn)) { |
615 | inflate_threshold_use = inflate_threshold_root; | 600 | inflate_threshold_use = inflate_threshold_root; |
616 | halve_threshold_use = halve_threshold_root; | 601 | halve_threshold_use = halve_threshold_root; |
617 | } else { | 602 | } else { |
@@ -637,8 +622,6 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) | |||
637 | } | 622 | } |
638 | } | 623 | } |
639 | 624 | ||
640 | check_tnode(tn); | ||
641 | |||
642 | /* Return if at least one inflate is run */ | 625 | /* Return if at least one inflate is run */ |
643 | if (max_work != MAX_WORK) | 626 | if (max_work != MAX_WORK) |
644 | return (struct rt_trie_node *) tn; | 627 | return (struct rt_trie_node *) tn; |
@@ -666,21 +649,16 @@ static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) | |||
666 | 649 | ||
667 | 650 | ||
668 | /* Only one child remains */ | 651 | /* Only one child remains */ |
669 | if (tn->empty_children == tnode_child_length(tn) - 1) { | 652 | if (tn->empty_children == (tnode_child_length(tn) - 1)) { |
653 | unsigned long i; | ||
670 | one_child: | 654 | one_child: |
671 | for (i = 0; i < tnode_child_length(tn); i++) { | 655 | for (i = tnode_child_length(tn); !n && i;) |
672 | struct rt_trie_node *n; | 656 | n = tnode_get_child(tn, --i); |
673 | 657 | no_children: | |
674 | n = rtnl_dereference(tn->child[i]); | 658 | /* compress one level */ |
675 | if (!n) | 659 | node_set_parent(n, NULL); |
676 | continue; | 660 | tnode_free_safe(tn); |
677 | 661 | return n; | |
678 | /* compress one level */ | ||
679 | |||
680 | node_set_parent(n, NULL); | ||
681 | tnode_free_safe(tn); | ||
682 | return n; | ||
683 | } | ||
684 | } | 662 | } |
685 | return (struct rt_trie_node *) tn; | 663 | return (struct rt_trie_node *) tn; |
686 | } | 664 | } |
@@ -760,8 +738,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
760 | 738 | ||
761 | /* A leaf or an internal node with skipped bits */ | 739 | /* A leaf or an internal node with skipped bits */ |
762 | 740 | ||
763 | if (IS_LEAF(node) || ((struct tnode *) node)->pos > | 741 | if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) { |
764 | tn->pos + tn->bits - 1) { | ||
765 | put_child(tn, | 742 | put_child(tn, |
766 | tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), | 743 | tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), |
767 | node); | 744 | node); |
@@ -958,11 +935,9 @@ fib_find_node(struct trie *t, u32 key) | |||
958 | pos = 0; | 935 | pos = 0; |
959 | n = rcu_dereference_rtnl(t->trie); | 936 | n = rcu_dereference_rtnl(t->trie); |
960 | 937 | ||
961 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 938 | while (n && IS_TNODE(n)) { |
962 | tn = (struct tnode *) n; | 939 | tn = (struct tnode *) n; |
963 | 940 | ||
964 | check_tnode(tn); | ||
965 | |||
966 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 941 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
967 | pos = tn->pos + tn->bits; | 942 | pos = tn->pos + tn->bits; |
968 | n = tnode_get_child_rcu(tn, | 943 | n = tnode_get_child_rcu(tn, |
@@ -988,7 +963,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
988 | 963 | ||
989 | key = tn->key; | 964 | key = tn->key; |
990 | 965 | ||
991 | while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { | 966 | while (tn != NULL && (tp = node_parent(tn)) != NULL) { |
992 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 967 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
993 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); | 968 | wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); |
994 | tn = (struct tnode *)resize(t, tn); | 969 | tn = (struct tnode *)resize(t, tn); |
@@ -996,7 +971,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) | |||
996 | tnode_put_child_reorg(tp, cindex, | 971 | tnode_put_child_reorg(tp, cindex, |
997 | (struct rt_trie_node *)tn, wasfull); | 972 | (struct rt_trie_node *)tn, wasfull); |
998 | 973 | ||
999 | tp = node_parent((struct rt_trie_node *) tn); | 974 | tp = node_parent(tn); |
1000 | if (!tp) | 975 | if (!tp) |
1001 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); | 976 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
1002 | 977 | ||
@@ -1048,11 +1023,9 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1048 | * If it doesn't, we need to replace it with a T_TNODE. | 1023 | * If it doesn't, we need to replace it with a T_TNODE. |
1049 | */ | 1024 | */ |
1050 | 1025 | ||
1051 | while (n != NULL && NODE_TYPE(n) == T_TNODE) { | 1026 | while (n && IS_TNODE(n)) { |
1052 | tn = (struct tnode *) n; | 1027 | tn = (struct tnode *) n; |
1053 | 1028 | ||
1054 | check_tnode(tn); | ||
1055 | |||
1056 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { | 1029 | if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) { |
1057 | tp = tn; | 1030 | tp = tn; |
1058 | pos = tn->pos + tn->bits; | 1031 | pos = tn->pos + tn->bits; |
@@ -1087,12 +1060,11 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1087 | insert_leaf_info(&l->list, li); | 1060 | insert_leaf_info(&l->list, li); |
1088 | goto done; | 1061 | goto done; |
1089 | } | 1062 | } |
1090 | l = leaf_new(); | 1063 | l = leaf_new(key); |
1091 | 1064 | ||
1092 | if (!l) | 1065 | if (!l) |
1093 | return NULL; | 1066 | return NULL; |
1094 | 1067 | ||
1095 | l->key = key; | ||
1096 | li = leaf_info_new(plen); | 1068 | li = leaf_info_new(plen); |
1097 | 1069 | ||
1098 | if (!li) { | 1070 | if (!li) { |
@@ -1569,7 +1541,7 @@ backtrace: | |||
1569 | if (chopped_off <= pn->bits) { | 1541 | if (chopped_off <= pn->bits) { |
1570 | cindex &= ~(1 << (chopped_off-1)); | 1542 | cindex &= ~(1 << (chopped_off-1)); |
1571 | } else { | 1543 | } else { |
1572 | struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); | 1544 | struct tnode *parent = node_parent_rcu(pn); |
1573 | if (!parent) | 1545 | if (!parent) |
1574 | goto failed; | 1546 | goto failed; |
1575 | 1547 | ||
@@ -1597,7 +1569,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup); | |||
1597 | */ | 1569 | */ |
1598 | static void trie_leaf_remove(struct trie *t, struct leaf *l) | 1570 | static void trie_leaf_remove(struct trie *t, struct leaf *l) |
1599 | { | 1571 | { |
1600 | struct tnode *tp = node_parent((struct rt_trie_node *) l); | 1572 | struct tnode *tp = node_parent(l); |
1601 | 1573 | ||
1602 | pr_debug("entering trie_leaf_remove(%p)\n", l); | 1574 | pr_debug("entering trie_leaf_remove(%p)\n", l); |
1603 | 1575 | ||
@@ -2374,7 +2346,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2374 | 2346 | ||
2375 | if (IS_TNODE(n)) { | 2347 | if (IS_TNODE(n)) { |
2376 | struct tnode *tn = (struct tnode *) n; | 2348 | struct tnode *tn = (struct tnode *) n; |
2377 | __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); | 2349 | __be32 prf = htonl(tn->key); |
2378 | 2350 | ||
2379 | seq_indent(seq, iter->depth-1); | 2351 | seq_indent(seq, iter->depth-1); |
2380 | seq_printf(seq, " +-- %pI4/%d %d %d %d\n", | 2352 | seq_printf(seq, " +-- %pI4/%d %d %d %d\n", |