diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r-- | net/ipv4/fib_trie.c | 110 |
1 files changed, 65 insertions, 45 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 11d4d28190bd..c779ce96e5b5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -126,7 +126,7 @@ struct tnode { | |||
126 | struct work_struct work; | 126 | struct work_struct work; |
127 | struct tnode *tnode_free; | 127 | struct tnode *tnode_free; |
128 | }; | 128 | }; |
129 | struct rt_trie_node *child[0]; | 129 | struct rt_trie_node __rcu *child[0]; |
130 | }; | 130 | }; |
131 | 131 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 132 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,7 +151,7 @@ struct trie_stat { | |||
151 | }; | 151 | }; |
152 | 152 | ||
153 | struct trie { | 153 | struct trie { |
154 | struct rt_trie_node *trie; | 154 | struct rt_trie_node __rcu *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 155 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 156 | struct trie_use_stats stats; |
157 | #endif | 157 | #endif |
@@ -177,16 +177,29 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 177 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 179 | ||
180 | static inline struct tnode *node_parent(struct rt_trie_node *node) | 180 | /* |
181 | * caller must hold RTNL | ||
182 | */ | ||
183 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
181 | { | 184 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 185 | unsigned long parent; |
186 | |||
187 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | ||
188 | |||
189 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
183 | } | 190 | } |
184 | 191 | ||
185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) | 192 | /* |
193 | * caller must hold RCU read lock or RTNL | ||
194 | */ | ||
195 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
186 | { | 196 | { |
187 | struct tnode *ret = node_parent(node); | 197 | unsigned long parent; |
198 | |||
199 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
200 | lockdep_rtnl_is_held()); | ||
188 | 201 | ||
189 | return rcu_dereference_rtnl(ret); | 202 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
190 | } | 203 | } |
191 | 204 | ||
192 | /* Same as rcu_assign_pointer | 205 | /* Same as rcu_assign_pointer |
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | |||
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 211 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 212 | } |
200 | 213 | ||
201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) | 214 | /* |
215 | * caller must hold RTNL | ||
216 | */ | ||
217 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | ||
202 | { | 218 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 219 | BUG_ON(i >= 1U << tn->bits); |
204 | 220 | ||
205 | return tn->child[i]; | 221 | return rtnl_dereference(tn->child[i]); |
206 | } | 222 | } |
207 | 223 | ||
208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 224 | /* |
225 | * caller must hold RCU read lock or RTNL | ||
226 | */ | ||
227 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | ||
209 | { | 228 | { |
210 | struct rt_trie_node *ret = tnode_get_child(tn, i); | 229 | BUG_ON(i >= 1U << tn->bits); |
211 | 230 | ||
212 | return rcu_dereference_rtnl(ret); | 231 | return rcu_dereference_rtnl(tn->child[i]); |
213 | } | 232 | } |
214 | 233 | ||
215 | static inline int tnode_child_length(const struct tnode *tn) | 234 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -482,7 +501,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
482 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, | 501 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
483 | int wasfull) | 502 | int wasfull) |
484 | { | 503 | { |
485 | struct rt_trie_node *chi = tn->child[i]; | 504 | struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); |
486 | int isfull; | 505 | int isfull; |
487 | 506 | ||
488 | BUG_ON(i >= 1<<tn->bits); | 507 | BUG_ON(i >= 1<<tn->bits); |
@@ -660,7 +679,7 @@ one_child: | |||
660 | for (i = 0; i < tnode_child_length(tn); i++) { | 679 | for (i = 0; i < tnode_child_length(tn); i++) { |
661 | struct rt_trie_node *n; | 680 | struct rt_trie_node *n; |
662 | 681 | ||
663 | n = tn->child[i]; | 682 | n = rtnl_dereference(tn->child[i]); |
664 | if (!n) | 683 | if (!n) |
665 | continue; | 684 | continue; |
666 | 685 | ||
@@ -674,6 +693,20 @@ one_child: | |||
674 | return (struct rt_trie_node *) tn; | 693 | return (struct rt_trie_node *) tn; |
675 | } | 694 | } |
676 | 695 | ||
696 | |||
697 | static void tnode_clean_free(struct tnode *tn) | ||
698 | { | ||
699 | int i; | ||
700 | struct tnode *tofree; | ||
701 | |||
702 | for (i = 0; i < tnode_child_length(tn); i++) { | ||
703 | tofree = (struct tnode *)rtnl_dereference(tn->child[i]); | ||
704 | if (tofree) | ||
705 | tnode_free(tofree); | ||
706 | } | ||
707 | tnode_free(tn); | ||
708 | } | ||
709 | |||
677 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 710 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
678 | { | 711 | { |
679 | struct tnode *oldtnode = tn; | 712 | struct tnode *oldtnode = tn; |
@@ -750,8 +783,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
750 | inode = (struct tnode *) node; | 783 | inode = (struct tnode *) node; |
751 | 784 | ||
752 | if (inode->bits == 1) { | 785 | if (inode->bits == 1) { |
753 | put_child(t, tn, 2*i, inode->child[0]); | 786 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); |
754 | put_child(t, tn, 2*i+1, inode->child[1]); | 787 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); |
755 | 788 | ||
756 | tnode_free_safe(inode); | 789 | tnode_free_safe(inode); |
757 | continue; | 790 | continue; |
@@ -792,8 +825,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
792 | 825 | ||
793 | size = tnode_child_length(left); | 826 | size = tnode_child_length(left); |
794 | for (j = 0; j < size; j++) { | 827 | for (j = 0; j < size; j++) { |
795 | put_child(t, left, j, inode->child[j]); | 828 | put_child(t, left, j, rtnl_dereference(inode->child[j])); |
796 | put_child(t, right, j, inode->child[j + size]); | 829 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); |
797 | } | 830 | } |
798 | put_child(t, tn, 2*i, resize(t, left)); | 831 | put_child(t, tn, 2*i, resize(t, left)); |
799 | put_child(t, tn, 2*i+1, resize(t, right)); | 832 | put_child(t, tn, 2*i+1, resize(t, right)); |
@@ -803,18 +836,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
803 | tnode_free_safe(oldtnode); | 836 | tnode_free_safe(oldtnode); |
804 | return tn; | 837 | return tn; |
805 | nomem: | 838 | nomem: |
806 | { | 839 | tnode_clean_free(tn); |
807 | int size = tnode_child_length(tn); | 840 | return ERR_PTR(-ENOMEM); |
808 | int j; | ||
809 | |||
810 | for (j = 0; j < size; j++) | ||
811 | if (tn->child[j]) | ||
812 | tnode_free((struct tnode *)tn->child[j]); | ||
813 | |||
814 | tnode_free(tn); | ||
815 | |||
816 | return ERR_PTR(-ENOMEM); | ||
817 | } | ||
818 | } | 841 | } |
819 | 842 | ||
820 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 843 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
@@ -885,18 +908,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
885 | tnode_free_safe(oldtnode); | 908 | tnode_free_safe(oldtnode); |
886 | return tn; | 909 | return tn; |
887 | nomem: | 910 | nomem: |
888 | { | 911 | tnode_clean_free(tn); |
889 | int size = tnode_child_length(tn); | 912 | return ERR_PTR(-ENOMEM); |
890 | int j; | ||
891 | |||
892 | for (j = 0; j < size; j++) | ||
893 | if (tn->child[j]) | ||
894 | tnode_free((struct tnode *)tn->child[j]); | ||
895 | |||
896 | tnode_free(tn); | ||
897 | |||
898 | return ERR_PTR(-ENOMEM); | ||
899 | } | ||
900 | } | 913 | } |
901 | 914 | ||
902 | /* readside must use rcu_read_lock currently dump routines | 915 | /* readside must use rcu_read_lock currently dump routines |
@@ -1028,7 +1041,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1028 | t_key cindex; | 1041 | t_key cindex; |
1029 | 1042 | ||
1030 | pos = 0; | 1043 | pos = 0; |
1031 | n = t->trie; | 1044 | n = rtnl_dereference(t->trie); |
1032 | 1045 | ||
1033 | /* If we point to NULL, stop. Either the tree is empty and we should | 1046 | /* If we point to NULL, stop. Either the tree is empty and we should |
1034 | * just put a new leaf in if, or we have reached an empty child slot, | 1047 | * just put a new leaf in if, or we have reached an empty child slot, |
@@ -1314,6 +1327,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1314 | } | 1327 | } |
1315 | } | 1328 | } |
1316 | 1329 | ||
1330 | if (!plen) | ||
1331 | tb->tb_num_default++; | ||
1332 | |||
1317 | list_add_tail_rcu(&new_fa->fa_list, | 1333 | list_add_tail_rcu(&new_fa->fa_list, |
1318 | (fa ? &fa->fa_list : fa_head)); | 1334 | (fa ? &fa->fa_list : fa_head)); |
1319 | 1335 | ||
@@ -1679,6 +1695,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1679 | 1695 | ||
1680 | list_del_rcu(&fa->fa_list); | 1696 | list_del_rcu(&fa->fa_list); |
1681 | 1697 | ||
1698 | if (!plen) | ||
1699 | tb->tb_num_default--; | ||
1700 | |||
1682 | if (list_empty(fa_head)) { | 1701 | if (list_empty(fa_head)) { |
1683 | hlist_del_rcu(&li->hlist); | 1702 | hlist_del_rcu(&li->hlist); |
1684 | free_leaf_info(li); | 1703 | free_leaf_info(li); |
@@ -1751,7 +1770,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) | |||
1751 | continue; | 1770 | continue; |
1752 | 1771 | ||
1753 | if (IS_LEAF(c)) { | 1772 | if (IS_LEAF(c)) { |
1754 | prefetch(p->child[idx]); | 1773 | prefetch(rcu_dereference_rtnl(p->child[idx])); |
1755 | return (struct leaf *) c; | 1774 | return (struct leaf *) c; |
1756 | } | 1775 | } |
1757 | 1776 | ||
@@ -1969,6 +1988,7 @@ struct fib_table *fib_trie_table(u32 id) | |||
1969 | 1988 | ||
1970 | tb->tb_id = id; | 1989 | tb->tb_id = id; |
1971 | tb->tb_default = -1; | 1990 | tb->tb_default = -1; |
1991 | tb->tb_num_default = 0; | ||
1972 | 1992 | ||
1973 | t = (struct trie *) tb->tb_data; | 1993 | t = (struct trie *) tb->tb_data; |
1974 | memset(t, 0, sizeof(*t)); | 1994 | memset(t, 0, sizeof(*t)); |
@@ -2264,7 +2284,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2264 | 2284 | ||
2265 | /* walk rest of this hash chain */ | 2285 | /* walk rest of this hash chain */ |
2266 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); | 2286 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2267 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { | 2287 | while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { |
2268 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | 2288 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); |
2269 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | 2289 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); |
2270 | if (n) | 2290 | if (n) |