diff options
Diffstat (limited to 'net/ipv4/fib_trie.c')
-rw-r--r-- | net/ipv4/fib_trie.c | 111 |
1 files changed, 66 insertions, 45 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 11d4d28190bd..58c25ea5a5c1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <linux/init.h> | 72 | #include <linux/init.h> |
73 | #include <linux/list.h> | 73 | #include <linux/list.h> |
74 | #include <linux/slab.h> | 74 | #include <linux/slab.h> |
75 | #include <linux/prefetch.h> | ||
75 | #include <net/net_namespace.h> | 76 | #include <net/net_namespace.h> |
76 | #include <net/ip.h> | 77 | #include <net/ip.h> |
77 | #include <net/protocol.h> | 78 | #include <net/protocol.h> |
@@ -126,7 +127,7 @@ struct tnode { | |||
126 | struct work_struct work; | 127 | struct work_struct work; |
127 | struct tnode *tnode_free; | 128 | struct tnode *tnode_free; |
128 | }; | 129 | }; |
129 | struct rt_trie_node *child[0]; | 130 | struct rt_trie_node __rcu *child[0]; |
130 | }; | 131 | }; |
131 | 132 | ||
132 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 133 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -151,7 +152,7 @@ struct trie_stat { | |||
151 | }; | 152 | }; |
152 | 153 | ||
153 | struct trie { | 154 | struct trie { |
154 | struct rt_trie_node *trie; | 155 | struct rt_trie_node __rcu *trie; |
155 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 156 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
156 | struct trie_use_stats stats; | 157 | struct trie_use_stats stats; |
157 | #endif | 158 | #endif |
@@ -177,16 +178,29 @@ static const int sync_pages = 128; | |||
177 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 178 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
178 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 179 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
179 | 180 | ||
180 | static inline struct tnode *node_parent(struct rt_trie_node *node) | 181 | /* |
182 | * caller must hold RTNL | ||
183 | */ | ||
184 | static inline struct tnode *node_parent(const struct rt_trie_node *node) | ||
181 | { | 185 | { |
182 | return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); | 186 | unsigned long parent; |
187 | |||
188 | parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held()); | ||
189 | |||
190 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); | ||
183 | } | 191 | } |
184 | 192 | ||
185 | static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) | 193 | /* |
194 | * caller must hold RCU read lock or RTNL | ||
195 | */ | ||
196 | static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node) | ||
186 | { | 197 | { |
187 | struct tnode *ret = node_parent(node); | 198 | unsigned long parent; |
199 | |||
200 | parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() || | ||
201 | lockdep_rtnl_is_held()); | ||
188 | 202 | ||
189 | return rcu_dereference_rtnl(ret); | 203 | return (struct tnode *)(parent & ~NODE_TYPE_MASK); |
190 | } | 204 | } |
191 | 205 | ||
192 | /* Same as rcu_assign_pointer | 206 | /* Same as rcu_assign_pointer |
@@ -198,18 +212,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) | |||
198 | node->parent = (unsigned long)ptr | NODE_TYPE(node); | 212 | node->parent = (unsigned long)ptr | NODE_TYPE(node); |
199 | } | 213 | } |
200 | 214 | ||
201 | static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) | 215 | /* |
216 | * caller must hold RTNL | ||
217 | */ | ||
218 | static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i) | ||
202 | { | 219 | { |
203 | BUG_ON(i >= 1U << tn->bits); | 220 | BUG_ON(i >= 1U << tn->bits); |
204 | 221 | ||
205 | return tn->child[i]; | 222 | return rtnl_dereference(tn->child[i]); |
206 | } | 223 | } |
207 | 224 | ||
208 | static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) | 225 | /* |
226 | * caller must hold RCU read lock or RTNL | ||
227 | */ | ||
228 | static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i) | ||
209 | { | 229 | { |
210 | struct rt_trie_node *ret = tnode_get_child(tn, i); | 230 | BUG_ON(i >= 1U << tn->bits); |
211 | 231 | ||
212 | return rcu_dereference_rtnl(ret); | 232 | return rcu_dereference_rtnl(tn->child[i]); |
213 | } | 233 | } |
214 | 234 | ||
215 | static inline int tnode_child_length(const struct tnode *tn) | 235 | static inline int tnode_child_length(const struct tnode *tn) |
@@ -482,7 +502,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, | |||
482 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, | 502 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
483 | int wasfull) | 503 | int wasfull) |
484 | { | 504 | { |
485 | struct rt_trie_node *chi = tn->child[i]; | 505 | struct rt_trie_node *chi = rtnl_dereference(tn->child[i]); |
486 | int isfull; | 506 | int isfull; |
487 | 507 | ||
488 | BUG_ON(i >= 1<<tn->bits); | 508 | BUG_ON(i >= 1<<tn->bits); |
@@ -660,7 +680,7 @@ one_child: | |||
660 | for (i = 0; i < tnode_child_length(tn); i++) { | 680 | for (i = 0; i < tnode_child_length(tn); i++) { |
661 | struct rt_trie_node *n; | 681 | struct rt_trie_node *n; |
662 | 682 | ||
663 | n = tn->child[i]; | 683 | n = rtnl_dereference(tn->child[i]); |
664 | if (!n) | 684 | if (!n) |
665 | continue; | 685 | continue; |
666 | 686 | ||
@@ -674,6 +694,20 @@ one_child: | |||
674 | return (struct rt_trie_node *) tn; | 694 | return (struct rt_trie_node *) tn; |
675 | } | 695 | } |
676 | 696 | ||
697 | |||
698 | static void tnode_clean_free(struct tnode *tn) | ||
699 | { | ||
700 | int i; | ||
701 | struct tnode *tofree; | ||
702 | |||
703 | for (i = 0; i < tnode_child_length(tn); i++) { | ||
704 | tofree = (struct tnode *)rtnl_dereference(tn->child[i]); | ||
705 | if (tofree) | ||
706 | tnode_free(tofree); | ||
707 | } | ||
708 | tnode_free(tn); | ||
709 | } | ||
710 | |||
677 | static struct tnode *inflate(struct trie *t, struct tnode *tn) | 711 | static struct tnode *inflate(struct trie *t, struct tnode *tn) |
678 | { | 712 | { |
679 | struct tnode *oldtnode = tn; | 713 | struct tnode *oldtnode = tn; |
@@ -750,8 +784,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
750 | inode = (struct tnode *) node; | 784 | inode = (struct tnode *) node; |
751 | 785 | ||
752 | if (inode->bits == 1) { | 786 | if (inode->bits == 1) { |
753 | put_child(t, tn, 2*i, inode->child[0]); | 787 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); |
754 | put_child(t, tn, 2*i+1, inode->child[1]); | 788 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); |
755 | 789 | ||
756 | tnode_free_safe(inode); | 790 | tnode_free_safe(inode); |
757 | continue; | 791 | continue; |
@@ -792,8 +826,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
792 | 826 | ||
793 | size = tnode_child_length(left); | 827 | size = tnode_child_length(left); |
794 | for (j = 0; j < size; j++) { | 828 | for (j = 0; j < size; j++) { |
795 | put_child(t, left, j, inode->child[j]); | 829 | put_child(t, left, j, rtnl_dereference(inode->child[j])); |
796 | put_child(t, right, j, inode->child[j + size]); | 830 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); |
797 | } | 831 | } |
798 | put_child(t, tn, 2*i, resize(t, left)); | 832 | put_child(t, tn, 2*i, resize(t, left)); |
799 | put_child(t, tn, 2*i+1, resize(t, right)); | 833 | put_child(t, tn, 2*i+1, resize(t, right)); |
@@ -803,18 +837,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
803 | tnode_free_safe(oldtnode); | 837 | tnode_free_safe(oldtnode); |
804 | return tn; | 838 | return tn; |
805 | nomem: | 839 | nomem: |
806 | { | 840 | tnode_clean_free(tn); |
807 | int size = tnode_child_length(tn); | 841 | return ERR_PTR(-ENOMEM); |
808 | int j; | ||
809 | |||
810 | for (j = 0; j < size; j++) | ||
811 | if (tn->child[j]) | ||
812 | tnode_free((struct tnode *)tn->child[j]); | ||
813 | |||
814 | tnode_free(tn); | ||
815 | |||
816 | return ERR_PTR(-ENOMEM); | ||
817 | } | ||
818 | } | 842 | } |
819 | 843 | ||
820 | static struct tnode *halve(struct trie *t, struct tnode *tn) | 844 | static struct tnode *halve(struct trie *t, struct tnode *tn) |
@@ -885,18 +909,8 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
885 | tnode_free_safe(oldtnode); | 909 | tnode_free_safe(oldtnode); |
886 | return tn; | 910 | return tn; |
887 | nomem: | 911 | nomem: |
888 | { | 912 | tnode_clean_free(tn); |
889 | int size = tnode_child_length(tn); | 913 | return ERR_PTR(-ENOMEM); |
890 | int j; | ||
891 | |||
892 | for (j = 0; j < size; j++) | ||
893 | if (tn->child[j]) | ||
894 | tnode_free((struct tnode *)tn->child[j]); | ||
895 | |||
896 | tnode_free(tn); | ||
897 | |||
898 | return ERR_PTR(-ENOMEM); | ||
899 | } | ||
900 | } | 914 | } |
901 | 915 | ||
902 | /* readside must use rcu_read_lock currently dump routines | 916 | /* readside must use rcu_read_lock currently dump routines |
@@ -1028,7 +1042,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
1028 | t_key cindex; | 1042 | t_key cindex; |
1029 | 1043 | ||
1030 | pos = 0; | 1044 | pos = 0; |
1031 | n = t->trie; | 1045 | n = rtnl_dereference(t->trie); |
1032 | 1046 | ||
1033 | /* If we point to NULL, stop. Either the tree is empty and we should | 1047 | /* If we point to NULL, stop. Either the tree is empty and we should |
1034 | * just put a new leaf in if, or we have reached an empty child slot, | 1048 | * just put a new leaf in if, or we have reached an empty child slot, |
@@ -1314,6 +1328,9 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) | |||
1314 | } | 1328 | } |
1315 | } | 1329 | } |
1316 | 1330 | ||
1331 | if (!plen) | ||
1332 | tb->tb_num_default++; | ||
1333 | |||
1317 | list_add_tail_rcu(&new_fa->fa_list, | 1334 | list_add_tail_rcu(&new_fa->fa_list, |
1318 | (fa ? &fa->fa_list : fa_head)); | 1335 | (fa ? &fa->fa_list : fa_head)); |
1319 | 1336 | ||
@@ -1679,6 +1696,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) | |||
1679 | 1696 | ||
1680 | list_del_rcu(&fa->fa_list); | 1697 | list_del_rcu(&fa->fa_list); |
1681 | 1698 | ||
1699 | if (!plen) | ||
1700 | tb->tb_num_default--; | ||
1701 | |||
1682 | if (list_empty(fa_head)) { | 1702 | if (list_empty(fa_head)) { |
1683 | hlist_del_rcu(&li->hlist); | 1703 | hlist_del_rcu(&li->hlist); |
1684 | free_leaf_info(li); | 1704 | free_leaf_info(li); |
@@ -1751,7 +1771,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) | |||
1751 | continue; | 1771 | continue; |
1752 | 1772 | ||
1753 | if (IS_LEAF(c)) { | 1773 | if (IS_LEAF(c)) { |
1754 | prefetch(p->child[idx]); | 1774 | prefetch(rcu_dereference_rtnl(p->child[idx])); |
1755 | return (struct leaf *) c; | 1775 | return (struct leaf *) c; |
1756 | } | 1776 | } |
1757 | 1777 | ||
@@ -1969,6 +1989,7 @@ struct fib_table *fib_trie_table(u32 id) | |||
1969 | 1989 | ||
1970 | tb->tb_id = id; | 1990 | tb->tb_id = id; |
1971 | tb->tb_default = -1; | 1991 | tb->tb_default = -1; |
1992 | tb->tb_num_default = 0; | ||
1972 | 1993 | ||
1973 | t = (struct trie *) tb->tb_data; | 1994 | t = (struct trie *) tb->tb_data; |
1974 | memset(t, 0, sizeof(*t)); | 1995 | memset(t, 0, sizeof(*t)); |
@@ -2264,7 +2285,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2264 | 2285 | ||
2265 | /* walk rest of this hash chain */ | 2286 | /* walk rest of this hash chain */ |
2266 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); | 2287 | h = tb->tb_id & (FIB_TABLE_HASHSZ - 1); |
2267 | while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) { | 2288 | while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) { |
2268 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); | 2289 | tb = hlist_entry(tb_node, struct fib_table, tb_hlist); |
2269 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); | 2290 | n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); |
2270 | if (n) | 2291 | if (n) |